├── xde ├── .gitignore ├── xde.conf ├── .cargo │ └── config.toml ├── xde-link │ ├── .cargo │ │ └── config.toml │ ├── map.devfsadm-externs │ ├── Cargo.toml │ ├── i686-unknown-illumos.json │ ├── build.rs │ └── src │ │ └── lib.rs ├── rust-toolchain.toml ├── src │ ├── ip.h │ ├── ip-bindgen.sh │ ├── secpolicy.rs │ ├── sys.rs │ ├── dls │ │ └── sys.rs │ └── lib.rs ├── Cargo.toml ├── x86_64-unknown-unknown.json └── README.md ├── .gitignore ├── fuzz ├── .gitignore ├── fuzz_targets │ ├── parse-in.rs │ └── parse-out.rs └── Cargo.toml ├── rust-toolchain.toml ├── crates ├── README.adoc ├── kstat-macro │ ├── Cargo.toml │ └── src │ │ └── lib.rs ├── illumos-sys-hdrs │ └── Cargo.toml ├── opte-api │ ├── check-api-version.sh │ ├── src │ │ ├── ulp.rs │ │ ├── encap.rs │ │ ├── tcp.rs │ │ ├── dhcpv6.rs │ │ ├── lib.rs │ │ ├── mac.rs │ │ └── ndp.rs │ ├── check-api-version.awk │ └── Cargo.toml └── derror-macro │ ├── Cargo.toml │ └── src │ └── lib.rs ├── pkg ├── .gitignore ├── clean.sh ├── print-api-version.sh ├── build.sh └── opte.template.p5m ├── lib ├── opte │ ├── dns-lookup-host.pcap │ ├── dns-lookup-guest.pcap │ ├── .gitignore │ ├── src │ │ ├── ddi │ │ │ └── mod.rs │ │ ├── engine │ │ │ ├── udp.rs │ │ │ ├── icmp │ │ │ │ └── mod.rs │ │ │ ├── tcp.rs │ │ │ ├── arp.rs │ │ │ └── port │ │ │ │ └── meta.rs │ │ └── dynamic.rs │ ├── build.rs │ ├── process-flow.md │ └── Cargo.toml ├── README.adoc ├── opte-test-utils │ ├── README.adoc │ ├── Cargo.toml │ └── src │ │ ├── pcap.rs │ │ └── dhcp.rs ├── oxide-vpc │ ├── tests │ │ ├── resources │ │ │ ├── data │ │ │ │ ├── crash-13f1436eb96d0a2d186c5568816951b80ed3a8d6 │ │ │ │ ├── crash-64c95b1326dd017e7d1eee75181ffb681b7f8670 │ │ │ │ └── crash-e5000c291b7eec7b56d08822d4b3315cc667dbee │ │ │ ├── parse_in │ │ │ │ └── ip6.ron │ │ │ └── parse_out │ │ │ │ └── ip4.ron │ │ └── fuzz_regression.rs │ ├── .gitignore │ ├── src │ │ ├── engine │ │ │ └── gateway │ │ │ │ ├── icmp.rs │ │ │ │ ├── arp.rs │ │ │ │ ├── dhcpv6.rs │ │ │ │ ├── transit.rs │ │ │ │ ├── dhcp.rs │ │ │ │ └── icmpv6.rs │ │ └── lib.rs │ └── Cargo.toml └── opte-ioctl │ └── Cargo.toml ├── bench ├── .cargo │ └── config.toml ├── src │ ├── lib.rs │ ├── iperf.rs │ └── kbench │ │ ├── mod.rs │ │ └── workload.rs ├── Cargo.toml ├── Gimlet.adoc └── README.adoc ├── xde-tests ├── .cargo │ └── config.toml ├── Cargo.toml └── tests │ └── loopback.rs ├── rustfmt.toml ├── .cargo └── config.toml ├── dtrace ├── protos.d ├── opte-count-cycles.d ├── opte-ioctl.d ├── usdt-opte-flow-expire.d ├── usdt-uft-invalidate.d ├── usdt-opte-tcp-flow-state.d ├── usdt-opte-tcp-flow-drop.d ├── opte-next-hop.d ├── usdt-opte-layer-process.d ├── usdt-port-process.d ├── opte-guest-loopback.d ├── opte-gen-ht-fail.d ├── usdt-opte-rule-match.d ├── opte-gen-desc-fail.d ├── opte-flow-expire.d ├── opte-trace ├── opte-uft-hit.d ├── opte-tcp-flow-state.d ├── lib │ └── common.d ├── opte-uft-invalidate.d ├── opte-ht.d ├── opte-rule-match.d ├── opte-layer-process.d ├── opte-port-process.d ├── opte-bad-packet.d ├── common.h └── README.adoc ├── .github ├── renovate.json └── buildomat │ ├── config.toml │ ├── jobs │ ├── opte-ioctl.sh │ ├── opte-api.sh │ ├── opte.sh │ ├── oxide-vpc.sh │ ├── opteadm.sh │ ├── p5p.sh │ ├── test.sh │ ├── bench.sh │ └── xde.sh │ └── common.sh ├── preflight.sh ├── bin └── opteadm │ ├── src │ └── lib.rs │ ├── Cargo.toml │ └── build.rs ├── xtask └── Cargo.toml ├── tools └── flamegraph-xde.sh ├── CONTRIBUTING.adoc ├── README.adoc └── Cargo.toml /xde/.gitignore: -------------------------------------------------------------------------------- 1 | xde 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | target 3 | download 4 | .DS_STORE 5 | -------------------------------------------------------------------------------- /fuzz/.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | corpus 3 | artifacts 4 | coverage 5 | -------------------------------------------------------------------------------- /rust-toolchain.toml: -------------------------------------------------------------------------------- 1 | [toolchain] 2 | channel = "1.90.0" 3 | profile = "default" 4 | -------------------------------------------------------------------------------- /xde/xde.conf: -------------------------------------------------------------------------------- 1 | # xde kernel module configuration file 2 | 3 | name="xde" parent="pseudo" instance=0; 4 | -------------------------------------------------------------------------------- /crates/README.adoc: -------------------------------------------------------------------------------- 1 | = Internal OPTE crates 2 | 3 | This folder contains crates used by OPTE internally. 4 | -------------------------------------------------------------------------------- /pkg/.gitignore: -------------------------------------------------------------------------------- 1 | packages 2 | proto 3 | repo 4 | opte.final.p5m 5 | opte.generate.p5m 6 | opte.base.p5m 7 | -------------------------------------------------------------------------------- /lib/opte/dns-lookup-host.pcap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oxidecomputer/opte/HEAD/lib/opte/dns-lookup-host.pcap -------------------------------------------------------------------------------- /lib/README.adoc: -------------------------------------------------------------------------------- 1 | = Public OPTE libs 2 | 3 | This folder contains the public API crates for interacting with OPTE. 4 | -------------------------------------------------------------------------------- /lib/opte/dns-lookup-guest.pcap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oxidecomputer/opte/HEAD/lib/opte/dns-lookup-guest.pcap -------------------------------------------------------------------------------- /xde/.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [build] 2 | target = "x86_64-unknown-unknown.json" 3 | 4 | [unstable] 5 | build-std = ["core", "alloc"] -------------------------------------------------------------------------------- /xde/xde-link/.cargo/config.toml: -------------------------------------------------------------------------------- 1 | 2 | [build] 3 | target = "i686-unknown-illumos.json" 4 | 5 | [unstable] 6 | build-std = ["core"] -------------------------------------------------------------------------------- /pkg/clean.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | rm -rf proto 6 | rm -rf packages 7 | rm -f opte.final.p5m opte.generate.p5m opte.base.p5m 8 | -------------------------------------------------------------------------------- /lib/opte-test-utils/README.adoc: -------------------------------------------------------------------------------- 1 | = OPTE Test Utilities 2 | 3 | Functions and harnesses for creating pre-configured VPCs and packets for testing and benchmarking. -------------------------------------------------------------------------------- /bench/.cargo/config.toml: -------------------------------------------------------------------------------- 1 | # https://github.com/rust-lang/cargo/issues/3946#issuecomment-973132993 2 | [env] 3 | CARGO_WORKSPACE_DIR = { value = "..", relative = true } 4 | -------------------------------------------------------------------------------- /xde-tests/.cargo/config.toml: -------------------------------------------------------------------------------- 1 | # https://github.com/rust-lang/cargo/issues/3946#issuecomment-973132993 2 | [env] 3 | CARGO_WORKSPACE_DIR = { value = "..", relative = true } 4 | -------------------------------------------------------------------------------- /xde/xde-link/map.devfsadm-externs: -------------------------------------------------------------------------------- 1 | $mapfile_version 2 2 | 3 | SYMBOL_SCOPE { 4 | global: 5 | devfsadm_mklink { FLAGS = PARENT }; 6 | devfsadm_rm_all { FLAGS = PARENT }; 7 | }; -------------------------------------------------------------------------------- /xde/rust-toolchain.toml: -------------------------------------------------------------------------------- 1 | [toolchain] 2 | channel = "nightly-2025-09-19" 3 | target = "x86_64-unknown-illumos" 4 | components = [ "clippy", "rustfmt", "rust-src" ] 5 | profile = "minimal" 6 | -------------------------------------------------------------------------------- /lib/opte/.gitignore: -------------------------------------------------------------------------------- 1 | gateway_icmpv4_ping.pcap 2 | overlay_guest_to_guest-guest-1.pcap 3 | overlay_guest_to_guest-guest-2.pcap 4 | overlay_guest_to_guest-phys-1.pcap 5 | overlay_guest_to_guest-phys-2.pcap -------------------------------------------------------------------------------- /pkg/print-api-version.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # This script assumes you are running it from the opte/pkg dir. 4 | 5 | grep 'API_VERSION' ../crates/opte-api/src/lib.rs | awk '{ print $6 }' | sed 's/;//' 6 | -------------------------------------------------------------------------------- /xde/src/ip.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | -------------------------------------------------------------------------------- /lib/oxide-vpc/tests/resources/data/crash-13f1436eb96d0a2d186c5568816951b80ed3a8d6: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oxidecomputer/opte/HEAD/lib/oxide-vpc/tests/resources/data/crash-13f1436eb96d0a2d186c5568816951b80ed3a8d6 -------------------------------------------------------------------------------- /lib/oxide-vpc/tests/resources/data/crash-64c95b1326dd017e7d1eee75181ffb681b7f8670: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oxidecomputer/opte/HEAD/lib/oxide-vpc/tests/resources/data/crash-64c95b1326dd017e7d1eee75181ffb681b7f8670 -------------------------------------------------------------------------------- /lib/oxide-vpc/tests/resources/data/crash-e5000c291b7eec7b56d08822d4b3315cc667dbee: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oxidecomputer/opte/HEAD/lib/oxide-vpc/tests/resources/data/crash-e5000c291b7eec7b56d08822d4b3315cc667dbee -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | # It's nice to have some right hand margin on comments. 2 | comment_width = 74 3 | max_width = 80 4 | use_small_heuristics = "max" 5 | imports_granularity = "Item" 6 | style_edition = "2024" 7 | edition = "2024" 8 | -------------------------------------------------------------------------------- /lib/oxide-vpc/tests/resources/parse_in/ip6.ron: -------------------------------------------------------------------------------- 1 | { 2 | "ddm-small-len": ( 3 | description: "Incorrect DDM extension length handling leads to underflow.", 4 | packet: "crash-e5000c291b7eec7b56d08822d4b3315cc667dbee" 5 | ) 6 | } 7 | -------------------------------------------------------------------------------- /.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [alias] 2 | xtask = "run --package xtask --" 3 | ubench = "bench --package opte-bench --bench userland --profile release-lto --" 4 | kbench = "bench --package opte-bench --bench xde --" 5 | 6 | [env] 7 | CARGO_WORKSPACE_DIR = { value = "", relative = true } 8 | -------------------------------------------------------------------------------- /dtrace/protos.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Definitions of the IP protocol numbers as an associative array. 3 | */ 4 | BEGIN { 5 | protos[1] = "ICMP"; 6 | protos[2] = "IGMP"; 7 | protos[6] = "TCP"; 8 | protos[17] = "UDP"; 9 | protos[58] = "ICMPv6"; 10 | protos[255] = "XXX"; 11 | } 12 | -------------------------------------------------------------------------------- /xde/xde-link/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "xde-link" 3 | version = "0.1.0" 4 | 5 | edition.workspace = true 6 | license.workspace = true 7 | repository.workspace = true 8 | 9 | build = "build.rs" 10 | 11 | [lib] 12 | crate-type = ["cdylib"] 13 | 14 | [dependencies] 15 | 16 | -------------------------------------------------------------------------------- /.github/renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://docs.renovatebot.com/renovate-schema.json", 3 | "extends": [ 4 | "local>oxidecomputer/renovate-config", 5 | "local>oxidecomputer/renovate-config//rust/autocreate", 6 | "local>oxidecomputer/renovate-config//actions/pin" 7 | ] 8 | } 9 | -------------------------------------------------------------------------------- /crates/kstat-macro/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "kstat-macro" 3 | version = "0.1.0" 4 | 5 | edition.workspace = true 6 | license.workspace = true 7 | repository.workspace = true 8 | 9 | [lib] 10 | proc-macro = true 11 | 12 | [dependencies] 13 | quote.workspace = true 14 | syn.workspace = true 15 | -------------------------------------------------------------------------------- /crates/illumos-sys-hdrs/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "illumos-sys-hdrs" 3 | version = "0.1.0" 4 | 5 | edition.workspace = true 6 | license.workspace = true 7 | repository.workspace = true 8 | 9 | [features] 10 | default = [] 11 | kernel = [] 12 | 13 | [dependencies] 14 | bitflags.workspace = true 15 | -------------------------------------------------------------------------------- /.github/buildomat/config.toml: -------------------------------------------------------------------------------- 1 | # 2 | # This file, with this flag, must be present in the default branch in 3 | # order for the buildomat integration to create check suites. 4 | # 5 | enable = true 6 | org_only = true 7 | 8 | allow_users = [ 9 | "oxide-reflector-bot[bot]", 10 | "oxide-renovate[bot]", 11 | ] 12 | -------------------------------------------------------------------------------- /preflight.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script can be helpful for catching issues locally before paying the CI 4 | # tax. 5 | 6 | ./.github/buildomat/jobs/opte.sh 7 | ./.github/buildomat/jobs/opteadm.sh 8 | ./.github/buildomat/jobs/test.sh 9 | ./.github/buildomat/jobs/xde.sh 10 | ./.github/buildomat/jobs/oxide-vpc.sh 11 | -------------------------------------------------------------------------------- /crates/opte-api/check-api-version.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # If there is a change to an opte-api source file relative to the `master` 4 | # branch, # then verify that the API_VERSION value has increased. 5 | if git diff master..HEAD | grep '^diff.*opte-api/src' 6 | then 7 | git diff master..HEAD | awk -f check-api-version.awk 8 | fi 9 | -------------------------------------------------------------------------------- /fuzz/fuzz_targets/parse-in.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | 3 | use libfuzzer_sys::fuzz_target; 4 | use opte::ddi::mblk::MsgBlk; 5 | use opte::engine::packet::Packet; 6 | use oxide_vpc::engine::VpcParser; 7 | 8 | fuzz_target!(|data: &[u8]| { 9 | let mut pkt_m = MsgBlk::copy(data); 10 | let _ = Packet::parse_inbound(pkt_m.iter_mut(), VpcParser {}); 11 | }); 12 | -------------------------------------------------------------------------------- /crates/derror-macro/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "derror-macro" 3 | version = "0.1.0" 4 | edition.workspace = true 5 | license.workspace = true 6 | repository.workspace = true 7 | 8 | [lib] 9 | proc-macro = true 10 | 11 | [dependencies] 12 | darling.workspace = true 13 | proc-macro2.workspace = true 14 | quote.workspace = true 15 | syn.workspace = true 16 | -------------------------------------------------------------------------------- /fuzz/fuzz_targets/parse-out.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | 3 | use libfuzzer_sys::fuzz_target; 4 | use opte::ddi::mblk::MsgBlk; 5 | use opte::engine::packet::Packet; 6 | use oxide_vpc::engine::VpcParser; 7 | 8 | fuzz_target!(|data: &[u8]| { 9 | let mut pkt_m = MsgBlk::copy(data); 10 | let _ = Packet::parse_outbound(pkt_m.iter_mut(), VpcParser {}); 11 | }); 12 | -------------------------------------------------------------------------------- /lib/oxide-vpc/.gitignore: -------------------------------------------------------------------------------- 1 | gateway_icmpv[46]_ping.pcap 2 | gateway_*_advert_reply.pcap 3 | overlay_guest_to_guest-guest-1.pcap 4 | overlay_guest_to_guest-guest-2.pcap 5 | overlay_guest_to_guest-phys-1.pcap 6 | overlay_guest_to_guest-phys-2.pcap 7 | dhcpv6_solicit_reply.pcap 8 | guest_to_internet_ipv[46].pcap 9 | snat-v[46]-echo-id.pcap 10 | icmp[46]_inner_rewrite.pcap 11 | -------------------------------------------------------------------------------- /bin/opteadm/src/lib.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2025 Oxide Computer Company 6 | 7 | //! OPTE driver administration library 8 | 9 | include!(concat!(env!("OUT_DIR"), "/gen.rs")); 10 | -------------------------------------------------------------------------------- /xtask/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "xtask" 3 | version = "0.1.0" 4 | edition.workspace = true 5 | license.workspace = true 6 | repository.workspace = true 7 | 8 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 9 | 10 | [dependencies] 11 | anyhow.workspace = true 12 | cargo_metadata.workspace = true 13 | clap.workspace = true 14 | toml.workspace = true 15 | -------------------------------------------------------------------------------- /lib/opte/src/ddi/mod.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2024 Oxide Computer Company 6 | 7 | //! Various abstractions for using the illumos DDI/DKI. 8 | 9 | pub mod kstat; 10 | pub mod mblk; 11 | pub mod sync; 12 | pub mod time; 13 | -------------------------------------------------------------------------------- /xde/src/ip-bindgen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [[ -z "$ILLUMOS_GATE" ]]; then 4 | echo "Must set ILLUMOS_GATE environment variable to gate source dir" 5 | exit 1 6 | fi 7 | 8 | export LD_LIBRARY_PATH=/opt/ooce/clang-12.0/lib/ 9 | 10 | bindgen ip.h \ 11 | --no-layout-tests \ 12 | --ctypes-prefix=illumos_sys_hdrs \ 13 | --use-core \ 14 | -- \ 15 | -I$ILLUMOS_GATE/usr/src/uts/common \ 16 | -D_KERNEL \ 17 | > ip.rs 18 | -------------------------------------------------------------------------------- /dtrace/opte-count-cycles.d: -------------------------------------------------------------------------------- 1 | xde_mc_tx:entry { 2 | self->tx_ts = vtimestamp; 3 | } 4 | 5 | xde_rx:entry { 6 | self->rx_ts = vtimestamp; 7 | } 8 | 9 | xde_mc_tx:return /self->tx_ts/ { 10 | @time["tx"] = lquantize((vtimestamp - self->tx_ts), 256, 32768, 256); 11 | self->tx_ts = 0; 12 | } 13 | 14 | xde_rx:return /self->rx_ts/ { 15 | @time["rx"] = lquantize((vtimestamp - self->rx_ts), 256, 32768, 256); 16 | self->rx_ts = 0; 17 | } 18 | 19 | END { 20 | 21 | } 22 | -------------------------------------------------------------------------------- /lib/oxide-vpc/tests/resources/parse_out/ip4.ron: -------------------------------------------------------------------------------- 1 | { 2 | "overlong-ihl": ( 3 | description: "Packet has both bad version and IHL longer than the entire packet body.", 4 | packet: "crash-64c95b1326dd017e7d1eee75181ffb681b7f8670" 5 | ), 6 | "underlong-pkt-len": ( 7 | description: "Packet has total_len shorter than the end of an apparent TCP header, and shorter than the full packet.", 8 | packet: "crash-13f1436eb96d0a2d186c5568816951b80ed3a8d6" 9 | ) 10 | } 11 | -------------------------------------------------------------------------------- /tools/flamegraph-xde.sh: -------------------------------------------------------------------------------- 1 | # A little messy right now, written as though it is exec'd from within 2 | # a git clone of FlameGraph. 3 | 4 | pfexec dtrace -x stackframes=100 -n 'profile-201us /arg0/ { @[stack()] = count(); } tick-120s { exit(0); }' -o out.stacks 5 | ./stackcollapse.pl out.stacks > out.folded 6 | cat out.folded | grep xde_rx > rx.folded 7 | cat out.folded | grep xde_mc_tx > tx.folded 8 | 9 | ./flamegraph.pl rx.folded > ~/rx.svg 10 | ./flamegraph.pl tx.folded > ~/tx.svg 11 | -------------------------------------------------------------------------------- /crates/opte-api/src/ulp.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2022 Oxide Computer Company 6 | 7 | /// Port 0 is reserved by the sockets layer. It is used by clients to 8 | /// indicate they want the operating system to choose a port on their 9 | /// behalf. 10 | pub const DYNAMIC_PORT: u16 = 0; 11 | -------------------------------------------------------------------------------- /xde/src/secpolicy.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2025 Oxide Computer Company 6 | 7 | // stuff we need from secpolicy 8 | 9 | use illumos_sys_hdrs::c_int; 10 | use illumos_sys_hdrs::cred_t; 11 | 12 | unsafe extern "C" { 13 | pub fn secpolicy_dl_config(cr: *const cred_t) -> c_int; 14 | } 15 | -------------------------------------------------------------------------------- /lib/opte-ioctl/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "opte-ioctl" 3 | version = "0.1.0" 4 | 5 | edition.workspace = true 6 | license.workspace = true 7 | repository.workspace = true 8 | 9 | [dependencies] 10 | opte = { workspace = true, default-features = false, features = ["api"] } 11 | oxide-vpc = { workspace = true, default-features = false, features = ["api"] } 12 | 13 | libc.workspace = true 14 | postcard.workspace = true 15 | serde.workspace = true 16 | thiserror.workspace = true 17 | libnet.workspace = true 18 | -------------------------------------------------------------------------------- /lib/opte-test-utils/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "opte-test-utils" 3 | version = "0.1.0" 4 | 5 | edition.workspace = true 6 | license.workspace = true 7 | repository.workspace = true 8 | 9 | [features] 10 | usdt = ["oxide-vpc/usdt"] 11 | 12 | [dependencies] 13 | anyhow.workspace = true 14 | opte = { workspace = true, features = ["std"] } 15 | oxide-vpc = { workspace = true, features = ["engine", "std", "test-help"] } 16 | pcap-parser = { workspace = true, features = ["serialize"] } 17 | smoltcp.workspace = true 18 | -------------------------------------------------------------------------------- /.github/buildomat/jobs/opte-ioctl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #: 3 | #: name = "opte-ioctl" 4 | #: variety = "basic" 5 | #: target = "helios-2.0" 6 | #: rust_toolchain = true 7 | #: output_rules = [] 8 | #: 9 | 10 | set -o errexit 11 | set -o pipefail 12 | set -o xtrace 13 | 14 | source .github/buildomat/common.sh 15 | 16 | cargo --version 17 | rustc --version 18 | 19 | cd lib/opte-ioctl 20 | 21 | header "check style" 22 | ptime -m cargo +$NIGHTLY fmt -- --check 23 | 24 | header "analyze" 25 | ptime -m cargo clippy --all-targets 26 | -------------------------------------------------------------------------------- /lib/opte/build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | println!("cargo:rerun-if-changed=build.rs"); 3 | println!("cargo::rustc-check-cfg=cfg(usdt_stable_asm)"); 4 | println!("cargo::rustc-check-cfg=cfg(usdt_stable_asm_sym)"); 5 | 6 | if version_check::is_min_version("1.59").unwrap_or(false) { 7 | println!("cargo:rustc-cfg=usdt_stable_asm"); 8 | } 9 | 10 | #[cfg(target_os = "macos")] 11 | if version_check::is_min_version("1.66").unwrap_or(false) { 12 | println!("cargo:rustc-cfg=usdt_stable_asm_sym"); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /dtrace/opte-ioctl.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Track the OPTE command ioctls as they come in. 3 | * 4 | * dtrace -L ./lib -I . -Cqs ./opte-ioctl.d 5 | */ 6 | xde_ioc_opte_cmd:entry { 7 | this->opte_cmd_ioctl = (opte_cmd_ioctl_t *)arg0; 8 | print(*this->opte_cmd_ioctl); 9 | printf("\n"); 10 | self->t = 1; 11 | } 12 | 13 | ddi_copyin:entry /self->t/ { 14 | printf("ddi_copyin(%p, %p, %u, 0x%x) =>", arg0, arg1, arg2, arg3); 15 | } 16 | 17 | ddi_copyin:return /self->t/ { 18 | printf(" %d\n", arg1); 19 | } 20 | 21 | xde_ioc_opte_cmd:return { 22 | self->t = 0; 23 | } 24 | -------------------------------------------------------------------------------- /xde-tests/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "xde-tests" 3 | version = "0.1.0" 4 | 5 | edition.workspace = true 6 | license.workspace = true 7 | repository.workspace = true 8 | 9 | [dependencies] 10 | opte-ioctl.workspace = true 11 | opte-test-utils.workspace = true 12 | oxide-vpc.workspace = true 13 | 14 | anyhow.workspace = true 15 | libnet.workspace = true 16 | rand.workspace = true 17 | slog.workspace = true 18 | slog-async.workspace = true 19 | slog-envlogger.workspace = true 20 | slog-term.workspace = true 21 | zone.workspace = true 22 | ztest.workspace = true 23 | -------------------------------------------------------------------------------- /dtrace/usdt-opte-flow-expire.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Track flow expiration. 3 | * 4 | * dtrace -ZCqs ./usdt-opte-flow-expire.d 5 | */ 6 | #define HDR_FMT "%-24s %-18s %s\n" 7 | 8 | BEGIN { 9 | printf(HDR_FMT, "PORT", "FT NAME", "FLOW", "LAST_HIT", "NOW"); 10 | num = 0; 11 | } 12 | 13 | flow-expired { 14 | this->port = copyinstr(arg0); 15 | this->layer = copyinstr(arg1); 16 | this->flow = copyinstr(arg2); 17 | this->last_hit = stringof(arg3); 18 | this->now = stringof(arg4); 19 | 20 | printf(HDR_FMT, this->port, this->layer, this->flow, this->last_hit, this->now); 21 | } 22 | -------------------------------------------------------------------------------- /dtrace/usdt-uft-invalidate.d: -------------------------------------------------------------------------------- 1 | #define HDR_FMT "%-8s %-3s %-43s %s\n" 2 | #define LINE_FMT "%-8s %-3s %-43s %u\n" 3 | 4 | BEGIN { 5 | printf(HDR_FMT, "PORT", "DIR", "FLOW", "EPOCH"); 6 | num = 0; 7 | } 8 | 9 | uft-invalidate { 10 | this->dir = json(copyinstr(arg0), "ok"); 11 | this->port = copyinstr(arg1); 12 | this->flow = copyinstr(arg2); 13 | this->epoch = arg3; 14 | num++; 15 | 16 | if (num >= 10) { 17 | printf(HDR_FMT, "PORT", "DIR", "FLOW", "EPOCH"); 18 | num = 0; 19 | } 20 | 21 | printf(LINE_FMT, this->port, this->dir, this->flow, this->epoch); 22 | } 23 | -------------------------------------------------------------------------------- /crates/opte-api/check-api-version.awk: -------------------------------------------------------------------------------- 1 | BEGIN { 2 | old_vsn = 0; 3 | new_vsn = 0; 4 | } 5 | 6 | /^-pub const API_VERSION: u64 = [0-9]+/ { 7 | old_vsn = ($6 + 0); 8 | } 9 | 10 | /^\+pub const API_VERSION: u64 = [0-9]+/ { 11 | new_vsn = ($6 + 0); 12 | } 13 | 14 | END { 15 | if (new_vsn <= old_vsn) { 16 | printf("FAILURE: The API_VERSION was not updated\n"); 17 | printf("\told: %u\n", old_vsn); 18 | printf("\tnew: %u\n", new_vsn); 19 | exit 1; 20 | } 21 | 22 | printf("SUCCESS: The API_VERSION was updated\n"); 23 | printf("\told: %u\n", old_vsn); 24 | printf("\tnew: %u\n", new_vsn); 25 | } 26 | -------------------------------------------------------------------------------- /dtrace/usdt-opte-tcp-flow-state.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Track TCP state changes. 3 | * 4 | * dtrace -ZCqs ./usdt-opte-tcp-flow-state.d 5 | */ 6 | #define HDR_FMT "%-16s %-8s %-8s %s\n" 7 | 8 | BEGIN { 9 | printf(HDR_FMT, "PORT", "CURR", "NEW", "FLOW"); 10 | num = 0; 11 | } 12 | 13 | tcp-flow-state { 14 | this->port = copyinstr(arg0); 15 | this->flow = copyinstr(arg1); 16 | this->curr = copyinstr(arg2); 17 | this->new = copyinstr(arg3); 18 | num++; 19 | 20 | printf(HDR_FMT, this->port, this->curr, this->new, this->flow); 21 | 22 | if (num >= 10) { 23 | printf(HDR_FMT, "PORT", "CURR", "NEW", "FLOW"); 24 | num = 0; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /fuzz/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "opte-fuzz" 3 | version = "0.0.0" 4 | publish = false 5 | edition = "2024" 6 | 7 | [package.metadata] 8 | cargo-fuzz = true 9 | 10 | [dependencies] 11 | libfuzzer-sys = "0.4" 12 | opte = { workspace = true, features = ["engine", "test-help", "usdt"] } 13 | oxide-vpc = { workspace = true, features = ["engine", "test-help", "usdt"] } 14 | 15 | [[bin]] 16 | name = "parse-in" 17 | path = "fuzz_targets/parse-in.rs" 18 | test = false 19 | doc = false 20 | bench = false 21 | 22 | [[bin]] 23 | name = "parse-out" 24 | path = "fuzz_targets/parse-out.rs" 25 | test = false 26 | doc = false 27 | bench = false 28 | -------------------------------------------------------------------------------- /crates/opte-api/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "opte-api" 3 | version = "0.1.0" 4 | 5 | edition.workspace = true 6 | license.workspace = true 7 | repository.workspace = true 8 | 9 | [features] 10 | default = ["std"] 11 | std = ["ipnetwork"] 12 | 13 | [dependencies] 14 | illumos-sys-hdrs.workspace = true 15 | 16 | ingot.workspace = true 17 | ipnetwork = { workspace = true, optional = true } 18 | postcard.workspace = true 19 | serde.workspace = true 20 | 21 | [dependencies.smoltcp] 22 | workspace = true 23 | default-features = false 24 | features = ["alloc", "medium-ethernet", "proto-ipv4", "proto-ipv6", "proto-dhcpv4", "socket", "socket-raw"] 25 | -------------------------------------------------------------------------------- /xde-tests/tests/loopback.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2025 Oxide Computer Company 6 | 7 | use anyhow::Result; 8 | 9 | #[test] 10 | fn test_xde_loopback() -> Result<()> { 11 | let topol = xde_tests::two_node_topology()?; 12 | 13 | // Now we should be able to ping b from a on the overlay. 14 | _ = &topol.nodes[0] 15 | .zone 16 | .zone 17 | .zexec(&format!("ping {}", &topol.nodes[1].port.ip()))?; 18 | 19 | Ok(()) 20 | } 21 | -------------------------------------------------------------------------------- /xde/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "xde" 3 | version = "0.1.0" 4 | 5 | edition.workspace = true 6 | license.workspace = true 7 | repository.workspace = true 8 | 9 | [dependencies] 10 | illumos-sys-hdrs = { workspace = true, features = ["kernel"] } 11 | opte = { workspace = true, features = ["engine", "kernel"], default-features = false } 12 | oxide-vpc = { workspace = true, features = ["engine", "kernel"], default-features = false } 13 | 14 | ingot.workspace = true 15 | 16 | bitflags.workspace = true 17 | crc32fast.workspace = true 18 | postcard.workspace = true 19 | serde.workspace = true 20 | zerocopy.workspace = true 21 | 22 | [lib] 23 | crate-type = ["staticlib"] 24 | name = "xde" 25 | 26 | -------------------------------------------------------------------------------- /.github/buildomat/jobs/opte-api.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #: 3 | #: name = "opte-api" 4 | #: variety = "basic" 5 | #: target = "helios-2.0" 6 | #: rust_toolchain = true 7 | #: output_rules = [] 8 | #: 9 | 10 | set -o errexit 11 | set -o pipefail 12 | set -o xtrace 13 | 14 | source .github/buildomat/common.sh 15 | 16 | cargo --version 17 | rustc --version 18 | 19 | cd crates/opte-api 20 | 21 | header "check API_VERSION" 22 | ./check-api-version.sh 23 | 24 | header "check style" 25 | ptime -m cargo +$NIGHTLY fmt -- --check 26 | 27 | header "analyze std" 28 | ptime -m cargo clippy --all-targets 29 | 30 | header "analyze no_std" 31 | ptime -m cargo clippy --no-default-features --all-targets 32 | 33 | header "test" 34 | ptime -m cargo test 35 | -------------------------------------------------------------------------------- /dtrace/usdt-opte-tcp-flow-drop.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Track TCP flow drops. These occur when either the packet doesn't 3 | * match the current expected state in the TCP state machine or when 4 | * the connection is CLOSED. 5 | * 6 | * dtrace -ZCqs ./usdt-opte-tcp-flow-drop.d 7 | */ 8 | #define HDR_FMT "%-16s %-48s %-24s %-8s\n" 9 | 10 | BEGIN { 11 | printf(HDR_FMT, "PORT", "FLOW", "STATE", "FLAGS"); 12 | num = 0; 13 | } 14 | 15 | tcp-flow-drop { 16 | this->port = copyinstr(arg0); 17 | this->flow = copyinstr(arg1); 18 | this->state = copyinstr(arg2); 19 | this->flags = copyinstr(arg3); 20 | num++; 21 | 22 | printf(HDR_FMT, this->port, this->flow, this->state, this->flags); 23 | 24 | if (num >= 10) { 25 | printf(HDR_FMT, "PORT", "FLOW", "STATE", "FLAGS"); 26 | num = 0; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /lib/opte/src/engine/udp.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2024 Oxide Computer Company 6 | 7 | //! UDP headers. 8 | 9 | use serde::Deserialize; 10 | use serde::Serialize; 11 | 12 | #[derive( 13 | Clone, 14 | Copy, 15 | Debug, 16 | Default, 17 | Deserialize, 18 | Eq, 19 | Ord, 20 | PartialEq, 21 | PartialOrd, 22 | Serialize, 23 | )] 24 | pub struct UdpPush { 25 | pub src: u16, 26 | pub dst: u16, 27 | } 28 | 29 | #[derive(Clone, Debug, Deserialize, Serialize)] 30 | pub struct UdpMod { 31 | src: Option, 32 | dst: Option, 33 | } 34 | -------------------------------------------------------------------------------- /dtrace/opte-next-hop.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Track next hop resolution. 3 | * 4 | * dtrace -L ./lib -I . -Cqs ./opte-next-hop.d 5 | */ 6 | #include "common.h" 7 | 8 | #define HDR_FMT "%-24s %-24s %-17s %-17s %s\n" 9 | 10 | BEGIN { 11 | printf(HDR_FMT, "DEST", "GATEWAY", "SRC MAC", "DST MAC", "MSG"); 12 | num = 0; 13 | } 14 | 15 | next-hop { 16 | this->dst = (in6_addr_t *)arg0; 17 | this->gw = (in6_addr_t *)arg1; 18 | this->gw_eth_src = (uchar_t *)arg2; 19 | this->gw_eth_dst = (uchar_t *)arg3; 20 | this->msg = stringof(arg4); 21 | this->msg = this->msg == "" ? "--" : this->msg; 22 | 23 | ETH_FMT(this->gw_eth_src_s, this->gw_eth_src); 24 | ETH_FMT(this->gw_eth_dst_s, this->gw_eth_dst); 25 | printf(HDR_FMT, inet_ntoa6(this->dst), inet_ntoa6(this->gw), 26 | this->gw_eth_src_s, this->gw_eth_dst_s, this->msg); 27 | } 28 | -------------------------------------------------------------------------------- /bin/opteadm/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "opteadm" 3 | version = "0.2.0" 4 | 5 | edition.workspace = true 6 | license.workspace = true 7 | repository.workspace = true 8 | 9 | [dependencies] 10 | # XXX For the time being opteadm needs to set the engine feature to 11 | # get all the types. Once there types are move to their appropriate 12 | # place this feature flag will be replaced/removed. 13 | opte = { workspace = true, features = ["api", "std"] } 14 | opte-ioctl.workspace = true 15 | oxide-vpc = { workspace = true, features = ["api", "std"] } 16 | 17 | anyhow.workspace = true 18 | cfg-if.workspace = true 19 | clap.workspace = true 20 | libc.workspace = true 21 | libnet.workspace = true 22 | postcard.workspace = true 23 | serde.workspace = true 24 | tabwriter.workspace = true 25 | thiserror.workspace = true 26 | 27 | [build-dependencies] 28 | anyhow.workspace = true 29 | -------------------------------------------------------------------------------- /.github/buildomat/common.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Install both toolchains required for OPTE. 4 | # We pin to both a specific nightly *and* a stable compiler version 5 | # due to XDE's reliance on unstable features. 6 | rustup show active-toolchain || rustup toolchain install 7 | 8 | pushd xde 9 | rustup show active-toolchain || rustup toolchain install 10 | export NIGHTLY=`rustup show active-toolchain -v | head -n 1 | cut -d' ' -f1` 11 | popd 12 | 13 | function header { 14 | echo "# ==== $* ==== #" 15 | } 16 | 17 | function install_pkg { 18 | set +o errexit 19 | pfexec pkg install $1 20 | exit_code=$? 21 | # 4 is the exit code returned from pkg when the package is already installed 22 | if [[ $exit_code -ne 0 ]] && [[ $exit_code -ne 4 ]]; then 23 | echo "package install failed for $1" 24 | exit 1 25 | fi 26 | set -o errexit 27 | } 28 | -------------------------------------------------------------------------------- /dtrace/usdt-opte-layer-process.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Track layer processing. 3 | * 4 | * dtrace -ZCqs ./usdt-opte-layer-process.d 5 | */ 6 | #define HDR_FMT "%-16s %-16s %-3s %-48s %-48s %s\n" 7 | 8 | BEGIN { 9 | printf(HDR_FMT, "PORT", "LAYER", "DIR", "FLOW BEFORE", "FLOW AFTER", 10 | "RES"); 11 | num = 0; 12 | } 13 | 14 | layer-process-return { 15 | this->dir = json(copyinstr(arg0), "ok.0"); 16 | this->port = json(copyinstr(arg0), "ok.1"); 17 | this->layer = copyinstr(arg1); 18 | this->flow_before = copyinstr(arg2); 19 | this->flow_after = copyinstr(arg3); 20 | this->res = copyinstr(arg4); 21 | num++; 22 | 23 | printf(HDR_FMT, this->port, this->layer, this->dir, this->flow_before, 24 | this->flow_after, this->res); 25 | 26 | if (num >= 10) { 27 | printf(HDR_FMT, "PORT", "LAYER", "DIR", "FLOW BEFORE", 28 | "FLOW AFTER", "RES"); 29 | num = 0; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /dtrace/usdt-port-process.d: -------------------------------------------------------------------------------- 1 | #define HDR_FMT "%-3s %-12s %-8s %-43s %-43s %-18s %s\n" 2 | #define LINE_FMT "%-3s %-12s %-8u %-43s %-43s 0x%-16p %s\n" 3 | 4 | BEGIN { 5 | printf(HDR_FMT, "DIR", "NAME", "EPOCH", "FLOW BEFORE", "FLOW AFTER", 6 | "MBLK", "RESULT"); 7 | num = 0; 8 | } 9 | 10 | port-process-return { 11 | this->dir = json(copyinstr(arg0), "ok.0"); 12 | this->name = json(copyinstr(arg0), "ok.1"); 13 | this->flow_before = json(copyinstr(arg1), "ok.0"); 14 | this->flow_after = json(copyinstr(arg1), "ok.1"); 15 | this->epoch = arg2; 16 | this->mp = arg3; 17 | this->res = copyinstr(arg4); 18 | num++; 19 | 20 | if (num >= 10) { 21 | printf(HDR_FMT, "DIR", "NAME", "EPCOH", "FLOW BEFORE", 22 | "FLOW AFTER", "MBLK", "RESULT"); 23 | num = 0; 24 | } 25 | 26 | printf(LINE_FMT, this->dir, this->name, this->epoch, 27 | this->flow_before, this->flow_after, this->mp, this->res); 28 | } 29 | -------------------------------------------------------------------------------- /xde/xde-link/i686-unknown-illumos.json: -------------------------------------------------------------------------------- 1 | { 2 | "arch": "x86", 3 | "cpu": "pentium4", 4 | "data-layout": "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:128-f64:32:64-f80:32-n8:16:32-S128", 5 | "dynamic-linking": true, 6 | "eh-frame-header": false, 7 | "frame-pointer": "always", 8 | "has-rpath": true, 9 | "is-like-solaris": true, 10 | "late-link-args": { 11 | "gcc": [ 12 | "-lc", 13 | "-lssp" 14 | ] 15 | }, 16 | "limit-rdylib-exports": false, 17 | "linker-is-gnu": false, 18 | "llvm-target": "i686-pc-solaris", 19 | "max-atomic-width": 64, 20 | "os": "illumos", 21 | "panic-strategy": "abort", 22 | "pre-link-args": { 23 | "gcc": [ 24 | "-m32", 25 | "-std=c99", 26 | "-Wl,-z,ignore" 27 | ] 28 | }, 29 | "supported-sanitizers": [ 30 | "address", 31 | "cfi" 32 | ], 33 | "target-family": [ 34 | "unix" 35 | ], 36 | "target-pointer-width": 32 37 | } 38 | -------------------------------------------------------------------------------- /bench/src/lib.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2024 Oxide Computer Company 6 | 7 | //! Utilites and tools for performing, recording, and processing 8 | //! benchmarks from local runners or DTrace output into `criterion`. 9 | 10 | use criterion::measurement::Measurement; 11 | use criterion::measurement::WallTime; 12 | 13 | #[cfg(feature = "alloc")] 14 | pub mod alloc; 15 | pub mod dtrace; 16 | pub mod iperf; 17 | pub mod kbench; 18 | pub mod packet; 19 | 20 | /// Additional labelling information for [`Measurement`]s for 21 | /// pretty-printing and grouping. 22 | pub trait MeasurementInfo: Measurement { 23 | fn label() -> &'static str; 24 | } 25 | 26 | impl MeasurementInfo for WallTime { 27 | fn label() -> &'static str { 28 | "wallclock" 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /crates/opte-api/src/encap.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2024 Oxide Computer Company 6 | 7 | pub use ingot::geneve::Vni; 8 | 9 | #[cfg(test)] 10 | mod test { 11 | use super::*; 12 | 13 | #[test] 14 | fn good_vni() { 15 | assert!(Vni::new(0u32).is_ok()); 16 | assert!(Vni::new(11u8).is_ok()); 17 | assert!(Vni::new((1u32 << 24) - 1).is_ok()); 18 | } 19 | 20 | #[test] 21 | fn bad_vni() { 22 | assert!(Vni::new(2u32.pow(24)).is_err()); 23 | assert!(Vni::new(2u32.pow(30)).is_err()); 24 | } 25 | 26 | #[test] 27 | fn vni_round_trip() { 28 | let vni = Vni::new(7777u32).unwrap(); 29 | assert_eq!([0x00, 0x1E, 0x61], vni.bytes()); 30 | assert_eq!(7777, u32::from(vni)); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /bench/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "opte-bench" 3 | version = "0.1.0" 4 | 5 | edition.workspace = true 6 | license.workspace = true 7 | repository.workspace = true 8 | 9 | 10 | [features] 11 | default = ["alloc"] 12 | alloc = [] 13 | 14 | [dependencies] 15 | anyhow.workspace = true 16 | clap.workspace = true 17 | criterion.workspace = true 18 | itertools = { workspace = true, features = ["use_std"] } 19 | nix.workspace = true 20 | rand.workspace = true 21 | opte.workspace = true 22 | opte-test-utils.workspace = true 23 | oxide-vpc.workspace = true 24 | serde = { default-features = true, workspace = true } 25 | serde_json.workspace = true 26 | 27 | [dev-dependencies] 28 | ctor.workspace = true 29 | smoltcp.workspace = true 30 | 31 | [target.'cfg(target_os = "illumos")'.dependencies] 32 | xde-tests = { path = "../xde-tests" } 33 | 34 | [target.'cfg(target_os = "illumos")'.dev-dependencies] 35 | ztest.workspace = true 36 | 37 | [[bench]] 38 | name = "userland" 39 | harness = false 40 | 41 | [[bench]] 42 | name = "xde" 43 | harness = false 44 | -------------------------------------------------------------------------------- /xde/x86_64-unknown-unknown.json: -------------------------------------------------------------------------------- 1 | { 2 | "arch": "x86_64", 3 | "code-model": "kernel", 4 | "cpu": "x86-64", 5 | "data-layout": "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", 6 | "disable-redzone": true, 7 | "dynamic-linking": false, 8 | "eh-frame-header": false, 9 | "frame-pointer": "always", 10 | "executables": true, 11 | "features": "-mmx,-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,+soft-float", 12 | "has-rpath": true, 13 | "is-like-solaris": true, 14 | "limit-rdylib-exports": false, 15 | "linker": "ld", 16 | "llvm-target": "x86_64-none-none", 17 | "max-atomic-width": 64, 18 | "no-default-libraries": true, 19 | "function-sections": false, 20 | "os": "illumos", 21 | "panic-strategy": "abort", 22 | "relax-elf-relocations": false, 23 | "relocation-model": "static", 24 | "relro-level": "full", 25 | "rustc-abi": "x86-softfloat", 26 | "staticlib-prefix": "", 27 | "target-family": "unix", 28 | "target-pointer-width": 64 29 | } 30 | -------------------------------------------------------------------------------- /.github/buildomat/jobs/opte.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #: 3 | #: name = "opte" 4 | #: variety = "basic" 5 | #: target = "helios-2.0" 6 | #: rust_toolchain = true 7 | #: output_rules = [] 8 | #: 9 | 10 | set -o errexit 11 | set -o pipefail 12 | set -o xtrace 13 | 14 | source .github/buildomat/common.sh 15 | 16 | cargo --version 17 | rustc --version 18 | 19 | cd lib/opte 20 | 21 | header "check style" 22 | ptime -m cargo +$NIGHTLY fmt -- --check 23 | 24 | header "check docs" 25 | # 26 | # I believe this means any doc warnings in deps will cause this to 27 | # fail. Using a more targeted approach in the future might be nice. 28 | # 29 | # Use nightly which is needed for the `kernel` feature. 30 | RUSTDOCFLAGS="-D warnings" ptime -m \ 31 | cargo +$NIGHTLY doc --no-default-features --features=api,std,engine,kernel 32 | 33 | header "analyze std + api" 34 | ptime -m cargo clippy --all-targets 35 | 36 | header "analyze no_std + engine + kernel" 37 | ptime -m cargo +$NIGHTLY clippy --no-default-features --features engine,kernel 38 | 39 | header "test" 40 | ptime -m cargo test 41 | -------------------------------------------------------------------------------- /dtrace/opte-guest-loopback.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Track guest loopback packets as they happen. 3 | * 4 | * dtrace -L ./lib -I . -Cqs ./opte-guest-loopback.d 5 | */ 6 | #include "common.h" 7 | #include "protos.d" 8 | 9 | #define HDR_FMT "%-43s %-12s %-12s\n" 10 | 11 | BEGIN { 12 | printf(HDR_FMT, "FLOW", "SRC PORT", "DST PORT"); 13 | num = 0; 14 | } 15 | 16 | guest-loopback { 17 | this->flow = (flow_id_sdt_arg_t *)arg1; 18 | this->src = stringof(arg2); 19 | this->dst = stringof(arg3); 20 | this->af = this->flow->af; 21 | num++; 22 | 23 | if (num >= 10) { 24 | printf(HDR_FMT, "FLOW", "SRC PORT", "DST PORT"); 25 | num = 0; 26 | } 27 | 28 | if (this->af != AF_INET && this->af != AF_INET6) { 29 | printf("BAD ADDRESS FAMILY: %d\n", this->af); 30 | } 31 | } 32 | 33 | guest-loopback /this->af == AF_INET/ { 34 | FLOW_FMT(this->s, this->flow); 35 | printf(HDR_FMT, this->s, this->src, this->dst); 36 | num++; 37 | } 38 | 39 | guest-loopback /this->af == AF_INET6/ { 40 | FLOW_FMT6(this->s, this->flow); 41 | printf(HDR_FMT, this->s, this->src, this->dst); 42 | num++; 43 | } 44 | 45 | 46 | -------------------------------------------------------------------------------- /.github/buildomat/jobs/oxide-vpc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #: 3 | #: name = "oxide-vpc" 4 | #: variety = "basic" 5 | #: target = "helios-2.0" 6 | #: rust_toolchain = true 7 | #: output_rules = [] 8 | #: 9 | 10 | set -o errexit 11 | set -o pipefail 12 | set -o xtrace 13 | 14 | source .github/buildomat/common.sh 15 | 16 | cargo --version 17 | rustc --version 18 | 19 | cd lib/oxide-vpc 20 | 21 | header "check style" 22 | ptime -m cargo +$NIGHTLY fmt -- --check 23 | 24 | header "check docs" 25 | # 26 | # I believe this means any doc warnings in deps will cause this to 27 | # fail. Using a more targeted approach in the future might be nice. 28 | # 29 | # Use nightly which is needed for the `kernel` feature. 30 | RUSTDOCFLAGS="-D warnings" ptime -m \ 31 | cargo +$NIGHTLY doc --no-default-features --features=api,std,engine,kernel 32 | 33 | header "analyze std + api + usdt" 34 | ptime -m cargo clippy --features usdt --all-targets 35 | 36 | header "analyze no_std + engine + kernel" 37 | ptime -m cargo +$NIGHTLY clippy --no-default-features --features engine,kernel 38 | 39 | header "test" 40 | ptime -m cargo test 41 | -------------------------------------------------------------------------------- /.github/buildomat/jobs/opteadm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #: 3 | #: name = "opteadm" 4 | #: variety = "basic" 5 | #: target = "helios-2.0" 6 | #: rust_toolchain = true 7 | #: output_rules = [ 8 | #: "=/work/debug/opteadm", 9 | #: "=/work/debug/opteadm.debug.sha256", 10 | #: "=/work/release/opteadm", 11 | #: "=/work/release/opteadm.release.sha256", 12 | #: ] 13 | #: 14 | #: [[publish]] 15 | #: series = "release" 16 | #: name = "opteadm" 17 | #: from_output = "/work/release/opteadm" 18 | 19 | set -o errexit 20 | set -o pipefail 21 | set -o xtrace 22 | 23 | source .github/buildomat/common.sh 24 | 25 | cargo --version 26 | rustc --version 27 | 28 | pushd bin/opteadm 29 | 30 | header "check style" 31 | ptime -m cargo +$NIGHTLY fmt -- --check 32 | 33 | header "analyze" 34 | ptime -m cargo clippy --all-targets 35 | 36 | header "debug build" 37 | ptime -m cargo build 38 | 39 | header "release build" 40 | ptime -m cargo build --release 41 | 42 | popd 43 | 44 | for x in debug release 45 | do 46 | mkdir -p /work/$x 47 | cp target/$x/opteadm /work/$x/ 48 | sha256sum "target/$x/opteadm" > "/work/$x/opteadm.$x.sha256" 49 | done 50 | -------------------------------------------------------------------------------- /dtrace/opte-gen-ht-fail.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Track StaticAction::gen_ht() failures. 3 | * 4 | * dtrace -L ./lib -I . -Cqs ./opte-gen-desc-fail.d 5 | */ 6 | #include "common.h" 7 | #include "protos.d" 8 | 9 | #define HDR_FMT "%-12s %-12s %-4s %-48s %s\n" 10 | 11 | BEGIN { 12 | 13 | printf(HDR_FMT, "PORT", "LAYER", "DIR", "FLOW", "MSG"); 14 | num = 0; 15 | } 16 | 17 | gen-ht-fail { 18 | this->port = stringof(arg0); 19 | this->layer = stringof(arg1); 20 | this->dir = DIR_STR(arg2); 21 | this->flow = (flow_id_sdt_arg_t *)arg3; 22 | this->msg = stringof(arg4); 23 | this->af = this->flow->af; 24 | 25 | if (this->af != AF_INET && this->af != AF_INET6) { 26 | printf("BAD ADDRESS FAMILY: %d\n", this->af); 27 | } 28 | 29 | if (num >= 10) { 30 | printf(HDR_FMT, "PORT", "LAYER", "DIR", "FLOW", "MSG"); 31 | num = 0; 32 | } 33 | } 34 | 35 | gen-ht-fail /this->af == AF_INET/ { 36 | FLOW_FMT(this->s, this->flow); 37 | printf(HDR_FMT, this->port, this->layer, this->dir, this->s, this->msg); 38 | } 39 | 40 | gen-ht-fail /this->af == AF_INET6/ { 41 | FLOW_FMT6(this->s, this->flow); 42 | printf(HDR_FMT, this->port, this->layer, this->dir, this->s, this->msg); 43 | } 44 | -------------------------------------------------------------------------------- /dtrace/usdt-opte-rule-match.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Track rule match/no-match as it happens. This is the USDT version; 3 | * useful for debugging when running tests. 4 | * 5 | * dtrace -ZCqs ./usdt-opte-rule-match.d 6 | */ 7 | #define HDR_FMT "%-8s %-12s %-6s %-3s %-43s %s\n" 8 | 9 | BEGIN { 10 | printf(HDR_FMT, "PORT", "LAYER", "MATCH", "DIR", "FLOW", "ACTION"); 11 | num = 0; 12 | } 13 | 14 | rule-match { 15 | this->port = copyinstr(arg0); 16 | this->layer = copyinstr(arg1); 17 | this->dir = json(copyinstr(arg2), "ok"); 18 | this->flow = copyinstr(arg3); 19 | this->action = copyinstr(arg4); 20 | 21 | printf(HDR_FMT, this->port, this->layer, "YES", this->dir, this->flow, 22 | this->action); 23 | } 24 | 25 | rule-no-match { 26 | this->port = copyinstr(arg0); 27 | this->layer = copyinstr(arg1); 28 | this->dir = json(copyinstr(arg2), "ok"); 29 | this->flow = copyinstr(arg3); 30 | 31 | printf(HDR_FMT, this->port, this->layer, "NO", this->dir, this->flow, 32 | "--"); 33 | } 34 | 35 | rule-match,rule-no-match { 36 | if (num >= 10) { 37 | printf(HDR_FMT, "PORT", "LAYER", "MATCH", "DIR", "FLOW", 38 | "ACTION"); 39 | num = 0; 40 | } 41 | 42 | num++; 43 | } 44 | -------------------------------------------------------------------------------- /dtrace/opte-gen-desc-fail.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Track gen_desc() failures for stateful actions. 3 | * 4 | * dtrace -L ./lib -I . -Cqs ./opte-gen-desc-fail.d 5 | */ 6 | #include "common.h" 7 | #include "protos.d" 8 | 9 | #define HDR_FMT "%-12s %-12s %-4s %-48s %s\n" 10 | 11 | BEGIN { 12 | printf(HDR_FMT, "PORT", "LAYER", "DIR", "FLOW", "MSG"); 13 | num = 0; 14 | } 15 | 16 | gen-desc-fail { 17 | this->port = stringof(arg0); 18 | this->layer = stringof(arg1); 19 | this->dir = DIR_STR(arg2); 20 | this->flow = (flow_id_sdt_arg_t *)arg3; 21 | this->msg = stringof(arg4); 22 | this->af = this->flow->af; 23 | num++; 24 | 25 | if (this->af != AF_INET && this->af != AF_INET6) { 26 | printf("BAD ADDRESS FAMILY: %d\n", this->af); 27 | } 28 | 29 | if (num >= 10) { 30 | printf(HDR_FMT, "PORT", "LAYER", "DIR", "FLOW", "MSG"); 31 | num = 0; 32 | } 33 | } 34 | 35 | gen-desc-fail /this->af == AF_INET/ { 36 | FLOW_FMT(this->s, this->flow); 37 | printf(HDR_FMT, this->port, this->layer, this->dir, this->s, this->msg); 38 | } 39 | 40 | gen-desc-fail /this->af == AF_INET6/ { 41 | FLOW_FMT6(this->s, this->flow); 42 | printf(HDR_FMT, this->port, this->layer, this->dir, this->s, this->msg); 43 | } 44 | -------------------------------------------------------------------------------- /lib/opte/process-flow.md: -------------------------------------------------------------------------------- 1 | The size of the TCP flow table is currently 8096. 2 | 3 | ```mermaid 4 | flowchart TD 5 | process_in([process_in]) --> is_def_id{flow_id == FLOW_ID_DEFAULT?}; 6 | is_def_id -- Yes --> lp[layers_process]; 7 | is_def_id -- No --> check_uft{UFT entry?}; 8 | check_uft -- Yes --> same_epoch{entry.epoch == port.epoch?}; 9 | check_uft -- No --> lp; 10 | same_epoch -- Yes --> run_ht[run HT]; 11 | same_epoch -- No --> inv[invalidate UFT entry]; 12 | inv --> lp; 13 | run_ht --> is_tcp_uft{TCP?}; 14 | is_tcp_uft -- Yes --> pite[process_in_tcp_existing]; 15 | is_tcp_uft -- No --> rm([return Modified]); 16 | lp --> lr{Layer Result?}; 17 | lr -- Allow --> uft_add[add UFT entry]; 18 | lr -- Deny --> rd([return Drop]); 19 | lr -- "Hairpin(hp)" --> rhp(["return Hairpin(hp)"]); 20 | lr -- "Err(e)" --> re(["return Err(e)"]); 21 | pitn -- "Ok(TcpState::Closed)" --> rd; 22 | pitn -- "Ok(tcp_state)" --> rm; 23 | pitn -- "Err(e)" --> re; 24 | pite -- "Ok(TcpState::Closed)" --> rd; 25 | pite -- "Ok(tcp_state)" --> rm; 26 | pite -- "Err(e)" --> re; 27 | uft_add --> is_tcp_no_uft{TCP?}; 28 | is_tcp_no_uft -- Yes --> pitn[process_in_tcp_new]; 29 | is_tcp_no_uft -- No --> rm; 30 | ``` 31 | -------------------------------------------------------------------------------- /dtrace/opte-flow-expire.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Track flow expiration. 3 | * 4 | * XXX Would be nice to add lifetime Rx/Tx packets/bytes stats. 5 | * 6 | * dtrace -L ./lib -I . -Cqs ./opte-flow-expire.d 7 | */ 8 | #include "common.h" 9 | #include "protos.d" 10 | 11 | #define HDR_FMT "%-24s %-18s %s %s %s\n" 12 | #define LINE_FMT "%-24s %-18s %s %u %u\n" 13 | 14 | BEGIN { 15 | printf(HDR_FMT, "PORT", "FT NAME", "FLOW", "LAST_HIT", "NOW"); 16 | num = 0; 17 | } 18 | 19 | flow-expired { 20 | this->port = stringof(arg0); 21 | this->name = stringof(arg1); 22 | this->flow = (flow_id_sdt_arg_t *)arg2; 23 | this->last_hit = arg3; 24 | this->now = arg4; 25 | 26 | if (num >= 10) { 27 | printf(HDR_FMT, "PORT", "FT NAME", "FLOW", "LAST_HIT", "NOW"); 28 | num = 0; 29 | } 30 | 31 | this->af = this->flow->af; 32 | 33 | if (this->af != AF_INET && this->af != AF_INET6) { 34 | printf("BAD ADDRESS FAMILY: %d\n", this->af); 35 | } 36 | } 37 | 38 | flow-expired /this->af == AF_INET/ { 39 | FLOW_FMT(this->s, this->flow); 40 | printf(LINE_FMT, this->port, this->name, this->s, this->last_hit, this->now); 41 | num++; 42 | } 43 | 44 | flow-expired /this->af == AF_INET6/ { 45 | FLOW_FMT6(this->s, this->flow); 46 | printf(LINE_FMT, this->port, this->name, this->s, this->last_hit, this->now); 47 | num++; 48 | } 49 | 50 | -------------------------------------------------------------------------------- /bin/opteadm/build.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2023 Oxide Computer Company 6 | 7 | use anyhow::Result; 8 | use anyhow::anyhow; 9 | use std::process::Command; 10 | 11 | fn main() -> Result<()> { 12 | println!("cargo:rerun-if-changed=../../.git/HEAD"); 13 | 14 | let commit_count_out = 15 | Command::new("git").args(["rev-list", "--count", "HEAD"]).output()?; 16 | 17 | if commit_count_out.status.success() { 18 | let commit_count: u64 = 19 | std::str::from_utf8(&commit_count_out.stdout)?.trim().parse()?; 20 | 21 | std::fs::write( 22 | std::env::var("OUT_DIR").unwrap() + "/gen.rs", 23 | format!( 24 | "\ 25 | // This file is autogenerated by build.rs -- do not edit! 26 | 27 | /// Number of git commits present at build time, used for OPTE versioning. 28 | pub const COMMIT_COUNT: u64 = {commit_count}; 29 | " 30 | ), 31 | )?; 32 | 33 | Ok(()) 34 | } else { 35 | let utf8_err = std::str::from_utf8(&commit_count_out.stderr).ok(); 36 | Err(anyhow!("Git commit count was unsuccessful: {utf8_err:?}")) 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /dtrace/opte-trace: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Run the specified DTrace script. This is a convenience wrapper to 4 | # allow the use of a DTrace library and common header without 5 | # requiring the user to understand all these mechanisms. 6 | # 7 | # For this to work the DTrace scripts, lib, and common header must all 8 | # reside at their precise relative locations to this runner script. 9 | # It's easiest to just run these scripts from the OPTE repo itself. 10 | # 11 | # Usage 12 | # 13 | # opte-trace 14 | # 15 | # Example 16 | # 17 | # ~/foo/opte/dtrace/opte-trace opte-rule-match 18 | # 19 | MYDIR=$(dirname "$0") 20 | MYPROG=$(basename "$0") 21 | 22 | function print_help 23 | { 24 | echo "$MYPROG [-h] " 25 | echo 26 | echo "\tWhere is the filename of the script you want to run." 27 | echo "\tFor convenience, the does not have to include the" 28 | echo "\t.d file extension" 29 | } 30 | 31 | while getopts "h" opt; do 32 | case $opt in 33 | h) 34 | print_help 35 | exit 0 36 | ;; 37 | esac 38 | done 39 | 40 | shift $((OPTIND - 1)) 41 | 42 | if (($# != 1)); then 43 | echo "ERROR: must specify single script" >&2 44 | echo 45 | print_help 46 | exit 1 47 | fi 48 | 49 | # Remove the .d extension to normalize the script name; add it back 50 | # when calling DTrace. 51 | script=${1%.d} 52 | 53 | dtrace -L $MYDIR/lib -I $MYDIR -Cqs $MYDIR/${script}.d 54 | -------------------------------------------------------------------------------- /dtrace/opte-uft-hit.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Track UFT entry hits as they happen. A hit occurs whenever a packet 3 | * matches an existing flow table entry (in- or outbound) with the same 4 | * epoch as the port. This is the 'fast-path' of packet matching. 5 | * 6 | * dtrace -L ./lib -I . -Cqs ./opte-uft-hit.d 7 | */ 8 | #include "common.h" 9 | #include "protos.d" 10 | 11 | #define HDR_FMT "%-8s %-3s %-43s %s %s\n" 12 | #define LINE_FMT "%-8s %-3s %-43s %u %u\n" 13 | 14 | BEGIN { 15 | printf(HDR_FMT, "PORT", "DIR", "FLOW", "EPOCH", "LAST_HIT"); 16 | num = 0; 17 | } 18 | 19 | uft-hit { 20 | this->dir = DIR_STR(arg0); 21 | this->port = stringof(arg1); 22 | this->flow = (flow_id_sdt_arg_t *)arg2; 23 | this->epoch = arg3; 24 | this->af = this->flow->af; 25 | this->last_hit = arg4; 26 | 27 | if (num >= 10) { 28 | printf(HDR_FMT, "PORT", "DIR", "FLOW", "EPOCH", "LAST_HIT"); 29 | num = 0; 30 | } 31 | 32 | if (this->af != AF_INET && this->af != AF_INET6) { 33 | printf("BAD ADDRESS FAMILY: %d\n", this->af); 34 | } 35 | } 36 | 37 | uft-hit /this->af == AF_INET/ { 38 | FLOW_FMT(this->s, this->flow); 39 | printf(LINE_FMT, this->port, this->dir, this->s, this->epoch, this->last_hit); 40 | num++; 41 | } 42 | 43 | uft-hit /this->af == AF_INET6/ { 44 | FLOW_FMT6(this->s, this->flow); 45 | printf(LINE_FMT, this->port, this->dir, this->s, this->epoch, this->last_hit); 46 | num++; 47 | } 48 | 49 | -------------------------------------------------------------------------------- /dtrace/opte-tcp-flow-state.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Track TCP flows as they change state. 3 | * 4 | * dtrace -L ./lib -I . -Cqs ./opte-tcp-flow-state.d 5 | */ 6 | #include "common.h" 7 | #include "protos.d" 8 | 9 | #define FMT "%-16s %-12s %-12s %s\n" 10 | 11 | BEGIN { 12 | /* 13 | * Use an associative array to stringify the TCP state 14 | * values. 15 | */ 16 | tcp_states[0] = "CLOSED"; 17 | tcp_states[1] = "LISTEN"; 18 | tcp_states[2] = "SYN_SENT"; 19 | tcp_states[3] = "SYN_RCVD"; 20 | tcp_states[4] = "ESTABLISHED"; 21 | tcp_states[5] = "CLOSE_WAIT"; 22 | tcp_states[6] = "LAST_ACK"; 23 | tcp_states[7] = "FIN_WAIT_1"; 24 | tcp_states[8] = "FIN_WAIT_2"; 25 | tcp_states[9] = "TIME_WAIT"; 26 | 27 | printf(FMT, "PORT", "CURR", "NEW", "FLOW"); 28 | num = 0; 29 | } 30 | 31 | tcp-flow-state { 32 | this->port = stringof(arg0); 33 | this->flow = (flow_id_sdt_arg_t *)arg1; 34 | this->af = this->flow->af; 35 | this->curr = tcp_states[arg2]; 36 | this->new = tcp_states[arg3]; 37 | 38 | if (num >= 10) { 39 | printf(FMT, "PORT", "CURR", "NEW", "FLOW"); 40 | num = 0; 41 | } 42 | 43 | num++; 44 | } 45 | 46 | tcp-flow-state /this->af == AF_INET/ { 47 | FLOW_FMT(this->s, this->flow); 48 | printf(FMT, this->port, this->curr, this->new, this->s); 49 | } 50 | 51 | tcp-flow-state /this->af == AF_INET6/ { 52 | FLOW_FMT6(this->s, this->flow); 53 | printf(FMT, this->port, this->curr, this->new, this->s); 54 | } 55 | -------------------------------------------------------------------------------- /dtrace/lib/common.d: -------------------------------------------------------------------------------- 1 | #pragma D depends_on module ip 2 | #pragma D depends_on provider ip 3 | 4 | typedef struct flow_id_sdt_arg { 5 | uint8_t proto; 6 | uint16_t af; 7 | union addrs { 8 | struct { 9 | ipaddr_t src; 10 | ipaddr_t dst; 11 | } ip4; 12 | struct { 13 | in6_addr_t src; 14 | in6_addr_t dst; 15 | } ip6; 16 | } addrs; 17 | uint16_t src_port; 18 | uint16_t dst_port; 19 | } flow_id_sdt_arg_t; 20 | 21 | typedef struct rule_match_sdt_arg { 22 | char *port; 23 | char *layer; 24 | uintptr_t dir; 25 | flow_id_sdt_arg_t *flow; 26 | char *rule_type; 27 | } rule_match_sdt_arg_t; 28 | 29 | typedef struct rule_no_match_sdt_arg { 30 | char *port; 31 | char *layer; 32 | uintptr_t dir; 33 | flow_id_sdt_arg_t *flow; 34 | } rule_no_match_sdt_arg_t; 35 | 36 | typedef struct ht_run_sdt_arg { 37 | char *port; 38 | char *loc; 39 | uintptr_t dir; 40 | flow_id_sdt_arg_t *flow_before; 41 | flow_id_sdt_arg_t *flow_after; 42 | } ht_run_sdt_arg_t; 43 | 44 | typedef struct opte_cmd_ioctl { 45 | uint64_t api_version; 46 | int cmd; 47 | uint64_t flags; 48 | uint64_t reserved; 49 | char *req_bytes; 50 | size_t req_len; 51 | char *resp_bytes; 52 | size_t resp_len; 53 | size_t resp_len_actual; 54 | } opte_cmd_ioctl_t; 55 | 56 | typedef struct derror_sdt_arg { 57 | size_t len; 58 | uint8_t truncated; 59 | uint64_t data[2]; 60 | char* entry[8]; 61 | } derror_sdt_arg_t; 62 | -------------------------------------------------------------------------------- /dtrace/opte-uft-invalidate.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Track UFT entry invalidations as they happen. An invalidation 3 | * occurs when the port's epoch has move forward based on a rule 4 | * change but the UFT entry is based on an older epoch; therefore it 5 | * needs to be invalidated so that a new entry may be generated from 6 | * the current rule set. 7 | * 8 | * dtrace -L ./lib -I . -Cqs ./opte-uft-invalidate.d 9 | */ 10 | #include "common.h" 11 | #include "protos.d" 12 | 13 | #define HDR_FMT "%-8s %-3s %-43s %s\n" 14 | #define LINE_FMT "%-8s %-3s %-43s %u\n" 15 | 16 | BEGIN { 17 | printf(HDR_FMT, "PORT", "DIR", "FLOW", "EPOCH"); 18 | num = 0; 19 | } 20 | 21 | uft-invalidate { 22 | this->dir = DIR_STR(arg0); 23 | this->port = stringof(arg1); 24 | this->flow = (flow_id_sdt_arg_t *)arg2; 25 | this->epoch = arg3; 26 | this->af = this->flow->af; 27 | 28 | if (num >= 10) { 29 | printf(HDR_FMT, "PORT", "DIR", "FLOW", "EPOCH"); 30 | num = 0; 31 | } 32 | 33 | if (this->af != AF_INET && this->af != AF_INET6) { 34 | printf("BAD ADDRESS FAMILY: %d\n", this->af); 35 | } 36 | } 37 | 38 | uft-invalidate /this->af == AF_INET/ { 39 | FLOW_FMT(this->s, this->flow); 40 | printf(LINE_FMT, this->port, this->dir, this->s, this->epoch); 41 | num++; 42 | } 43 | 44 | uft-invalidate /this->af == AF_INET6/ { 45 | FLOW_FMT6(this->s, this->flow); 46 | printf(LINE_FMT, this->port, this->dir, this->s, this->epoch); 47 | num++; 48 | } 49 | 50 | -------------------------------------------------------------------------------- /lib/oxide-vpc/src/engine/gateway/icmp.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2023 Oxide Computer Company 6 | 7 | //! The ICMP implementation of the Virtual Gateway. 8 | 9 | use crate::cfg::Ipv4Cfg; 10 | use crate::cfg::VpcCfg; 11 | use alloc::sync::Arc; 12 | use opte::api::Direction; 13 | use opte::api::OpteError; 14 | use opte::engine::icmp::v4::IcmpEchoReply; 15 | use opte::engine::layer::Layer; 16 | use opte::engine::rule::Action; 17 | use opte::engine::rule::Rule; 18 | 19 | pub fn setup( 20 | layer: &mut Layer, 21 | cfg: &VpcCfg, 22 | ip_cfg: &Ipv4Cfg, 23 | ) -> Result<(), OpteError> { 24 | // ================================================================ 25 | // ICMPv4 Echo Reply 26 | // ================================================================ 27 | let reply = Action::Hairpin(Arc::new(IcmpEchoReply { 28 | // Map an Echo from guest (src) -> gateway (dst) to an Echo 29 | // Reply from gateway (dst) -> guest (src). 30 | echo_src_mac: cfg.guest_mac, 31 | echo_src_ip: ip_cfg.private_ip, 32 | echo_dst_mac: cfg.gateway_mac, 33 | echo_dst_ip: ip_cfg.gateway_ip, 34 | })); 35 | let rule = Rule::new(1, reply); 36 | layer.add_rule(Direction::Out, rule.finalize()); 37 | Ok(()) 38 | } 39 | -------------------------------------------------------------------------------- /crates/opte-api/src/tcp.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2025 Oxide Computer Company 6 | 7 | use core::fmt; 8 | use core::fmt::Display; 9 | use serde::Deserialize; 10 | use serde::Serialize; 11 | 12 | /// The standard TCP states. 13 | /// 14 | /// See Figure 13-8 of TCP/IP Illustrated Vol. 1 Ed. 2 15 | #[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)] 16 | pub enum TcpState { 17 | Closed, 18 | Listen, 19 | SynSent, 20 | SynRcvd, 21 | Established, 22 | CloseWait, 23 | LastAck, 24 | FinWait1, 25 | FinWait2, 26 | TimeWait, 27 | } 28 | 29 | impl Display for TcpState { 30 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 31 | let s = match self { 32 | TcpState::Closed => "CLOSED", 33 | TcpState::Listen => "LISTEN", 34 | TcpState::SynSent => "SYN_SENT", 35 | TcpState::SynRcvd => "SYN_RCVD", 36 | TcpState::Established => "ESTABLISHED", 37 | TcpState::CloseWait => "CLOSE_WAIT", 38 | TcpState::LastAck => "LAST_ACK", 39 | TcpState::FinWait1 => "FIN_WAIT_1", 40 | TcpState::FinWait2 => "FIN_WAIT_2", 41 | TcpState::TimeWait => "TIME_WAIT", 42 | }; 43 | write!(f, "{s}") 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /dtrace/opte-ht.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Track Header Transformations as they happen. 3 | * 4 | * This only applies to header transformations which occur as part of 5 | * layer/rule processing. If you are interested in flow modification 6 | * in the hot path, see the opte-port-process script. 7 | * 8 | * dtrace -L ./lib -I . -Cqs ./opte-ht.d 9 | */ 10 | #include "common.h" 11 | #include "protos.d" 12 | 13 | #define HDR_FMT "%-3s %-12s %-12s %-40s %-40s\n" 14 | 15 | BEGIN { 16 | printf(HDR_FMT, "DIR", "PORT", "LOCATION", "BEFORE", "AFTER"); 17 | num = 0; 18 | } 19 | 20 | ht-run { 21 | this->ht = (ht_run_sdt_arg_t*)arg0; 22 | this->dir = DIR_STR(this->ht->dir); 23 | this->port = stringof(this->ht->port); 24 | this->loc = stringof(this->ht->loc); 25 | this->before = this->ht->flow_before; 26 | this->after = this->ht->flow_after; 27 | this->af = this->before->af; 28 | num++; 29 | 30 | if (this->af != AF_INET && this->af != AF_INET6) { 31 | printf("BAD ADDRESS FAMILY: %d\n", this->af); 32 | } 33 | 34 | if (num >= 10) { 35 | printf(HDR_FMT, "DIR", "PORT", "LOCATION", "BEFORE", "AFTER"); 36 | num = 0; 37 | } 38 | } 39 | 40 | ht-run /this->af == AF_INET/ { 41 | FLOW_FMT(this->bs, this->before); 42 | FLOW_FMT(this->as, this->after); 43 | printf(HDR_FMT, this->dir, this->port, this->loc, this->bs, this->as); 44 | } 45 | 46 | ht-run /this->af == AF_INET6/ { 47 | FLOW_FMT6(this->bs, this->before); 48 | FLOW_FMT6(this->as, this->after); 49 | printf(HDR_FMT, this->dir, this->port, this->loc, this->bs, this->as); 50 | } 51 | -------------------------------------------------------------------------------- /.github/buildomat/jobs/p5p.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #: 3 | #: name = "opte-p5p" 4 | #: variety = "basic" 5 | #: target = "helios-2.0" 6 | #: rust_toolchain = true 7 | #: output_rules = [ 8 | #: "=/out/opte.p5p", 9 | #: "=/out/opte.p5p.sha256", 10 | #: ] 11 | #: 12 | #: [[publish]] 13 | #: series = "repo" 14 | #: name = "opte.p5p" 15 | #: from_output = "/out/opte.p5p" 16 | #: 17 | #: [[publish]] 18 | #: series = "repo" 19 | #: name = "opte.p5p.sha256" 20 | #: from_output = "/out/opte.p5p.sha256" 21 | #: 22 | 23 | set -o errexit 24 | set -o pipefail 25 | set -o xtrace 26 | 27 | source .github/buildomat/common.sh 28 | 29 | # 30 | # TGT_BASE allows one to run this more easily in their local 31 | # environment: 32 | # 33 | # TGT_BASE=/var/tmp ./xde.sh 34 | # 35 | TGT_BASE=${TGT_BASE:=/work} 36 | 37 | REL_SRC=target/x86_64-unknown-unknown/release-lto 38 | REL_TGT=$TGT_BASE/release 39 | 40 | mkdir -p $REL_TGT 41 | 42 | cargo --version 43 | rustc --version 44 | 45 | header "build xde and opteadm (release+debug)" 46 | ptime -m cargo xtask build --profile all 47 | 48 | # 49 | # Inspect the kernel module for bad relocations in case the old 50 | # codegen issue ever shows its face again. 51 | # 52 | if elfdump $REL_SRC/xde | grep GOTPCREL; then 53 | echo "found GOTPCREL relocation in release build" 54 | exit 1 55 | fi 56 | 57 | header "package opte" 58 | cargo xtask package --skip-build --profile all 59 | 60 | banner copy 61 | pfexec mkdir -p /out 62 | pfexec chown "$UID" /out 63 | PKG_NAME="/out/opte.p5p" 64 | mv pkg/packages/repo/*.p5p "$PKG_NAME" 65 | sha256sum "$PKG_NAME" > "$PKG_NAME.sha256" 66 | -------------------------------------------------------------------------------- /lib/oxide-vpc/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "oxide-vpc" 3 | version = "0.1.0" 4 | 5 | edition.workspace = true 6 | license.workspace = true 7 | repository.workspace = true 8 | 9 | # The api feature is used by consumers of the Oxide VPC 10 | # implementation, namely Omicron. 11 | # 12 | # The engine feature is used by the OPTE driver that provides the 13 | # Oxide VPC implementation, namely xde. 14 | [features] 15 | default = ["api", "std"] 16 | api = ["opte/api"] 17 | engine = ["api", "opte/engine", "dep:ingot", "dep:poptrie", "dep:smoltcp"] 18 | kernel = ["opte/kernel"] 19 | std = ["dep:tabwriter","opte/std"] 20 | # 21 | # XXX: This is a hack in order for integration tests to make use of 22 | # test-only methods. 23 | # 24 | # For a method/function to be used by both unit and integration tests 25 | # mark them with the following: 26 | # 27 | # #[cfg(any(feature = "test-help", test))] 28 | # 29 | test-help = ["opte/test-help"] 30 | usdt = ["opte/usdt"] 31 | 32 | [dependencies] 33 | illumos-sys-hdrs.workspace = true 34 | opte.workspace = true 35 | 36 | cfg-if.workspace = true 37 | ingot = { workspace = true, optional = true } 38 | poptrie = { workspace = true, optional = true } 39 | serde.workspace = true 40 | smoltcp = { workspace = true, optional = true } 41 | tabwriter = { workspace = true, optional = true } 42 | uuid.workspace = true 43 | zerocopy.workspace = true 44 | 45 | [dev-dependencies] 46 | ctor.workspace = true 47 | opte-test-utils = { workspace = true, features = ["usdt"] } 48 | pcap-parser = { workspace = true, features = ["serialize"] } 49 | ron.workspace = true 50 | usdt.workspace = true 51 | -------------------------------------------------------------------------------- /lib/oxide-vpc/src/lib.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2023 Oxide Computer Company 6 | 7 | //! The Oxide VPC Network. 8 | //! 9 | //! This module contains configuration that is specific to the "Oxide 10 | //! VPC Network"; the guest overlay network that we implement on an 11 | //! Oxide Rack. OPTE itself is a generic engine for performing packet 12 | //! transformations in a flow-centric manner. While it does provide 13 | //! primitve building blocks for implementing network functions, like 14 | //! rules and header transpositions, it does not dictate a specific 15 | //! network configuration. This module configures OPTE in a manner 16 | //! consistent with the definition of The Oxide VPC Network in [RFD 17 | //! 21] (User Networking API) and [RFD 63] (Network Architecture). 18 | //! 19 | //! [rfd21]: https://rfd.shared.oxide.computer/rfd/0021 20 | //! 21 | //! [rfd63]: https://rfd.shared.oxide.computer/rfd/0063 22 | #![no_std] 23 | 24 | // NOTE: Things get weird if you move the extern crate into cfg_if!. 25 | #[cfg(any(feature = "std", test))] 26 | #[macro_use] 27 | extern crate std; 28 | 29 | #[macro_use] 30 | extern crate alloc; 31 | 32 | // TODO: move to own crate? 33 | #[cfg(any(feature = "api", test))] 34 | pub mod api; 35 | 36 | #[cfg(any(feature = "engine", test))] 37 | pub mod engine; 38 | 39 | #[cfg(any(feature = "engine", test))] 40 | pub mod cfg; 41 | 42 | #[cfg(any(feature = "std", test))] 43 | pub mod print; 44 | -------------------------------------------------------------------------------- /lib/opte/src/engine/icmp/mod.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2024 Oxide Computer Company 6 | 7 | //! Internet Control Message Protocol (ICMP) shared data structures. 8 | 9 | pub mod v4; 10 | pub mod v6; 11 | 12 | use crate::engine::predicate::DataPredicate; 13 | use crate::engine::predicate::EtherAddrMatch; 14 | use crate::engine::predicate::IpProtoMatch; 15 | use crate::engine::predicate::Predicate; 16 | use crate::engine::rule::AllowOrDeny; 17 | use crate::engine::rule::GenErr; 18 | use crate::engine::rule::GenPacketResult; 19 | use crate::engine::rule::HairpinAction; 20 | use alloc::vec::Vec; 21 | use core::fmt; 22 | use core::fmt::Display; 23 | use ingot::Ingot; 24 | use ingot::types::primitives::u16be; 25 | pub use opte_api::ip::Protocol; 26 | use serde::Deserialize; 27 | use serde::Serialize; 28 | use smoltcp::phy::Checksum; 29 | use smoltcp::phy::ChecksumCapabilities as Csum; 30 | use zerocopy::ByteSlice; 31 | 32 | /// Shared methods for handling ICMPv4/v6 Echo fields. 33 | pub trait QueryEcho { 34 | /// Extract an ID from the body of an ICMP(v6) packet. 35 | /// 36 | /// This method should return `None` for any non-echo packets. 37 | fn echo_id(&self) -> Option; 38 | } 39 | 40 | /// Internal structure of an ICMP(v6) Echo(Reply)'s rest_of_header. 41 | #[derive(Clone, Debug, Eq, Hash, PartialEq, Ingot)] 42 | #[ingot(impl_default)] 43 | pub struct IcmpEcho { 44 | pub id: u16be, 45 | pub sequence: u16be, 46 | } 47 | -------------------------------------------------------------------------------- /lib/oxide-vpc/src/engine/gateway/arp.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2023 Oxide Computer Company 6 | 7 | //! The ARP implementation of the Virtual Gateway. 8 | 9 | use crate::cfg::VpcCfg; 10 | use opte::api::Direction; 11 | use opte::api::MacAddr; 12 | use opte::api::OpteError; 13 | use opte::engine::ether::ETHER_TYPE_ARP; 14 | use opte::engine::layer::Layer; 15 | use opte::engine::predicate::EtherAddrMatch; 16 | use opte::engine::predicate::EtherTypeMatch; 17 | use opte::engine::predicate::Predicate; 18 | use opte::engine::rule::Action; 19 | use opte::engine::rule::Rule; 20 | 21 | pub fn setup(layer: &mut Layer, cfg: &VpcCfg) -> Result<(), OpteError> { 22 | // ================================================================ 23 | // Outbound ARP Request for Gateway, from Guest 24 | // 25 | // We need to respond to guest ARP requests so it may resolve the 26 | // gateway (OPTE) IP address. 27 | // ================================================================ 28 | let mut rule = Rule::new(1, Action::HandlePacket); 29 | rule.add_predicates(vec![ 30 | Predicate::InnerEtherType(vec![EtherTypeMatch::Exact(ETHER_TYPE_ARP)]), 31 | Predicate::InnerEtherDst(vec![EtherAddrMatch::Exact( 32 | MacAddr::BROADCAST, 33 | )]), 34 | Predicate::InnerEtherSrc(vec![EtherAddrMatch::Exact(cfg.guest_mac)]), 35 | ]); 36 | layer.add_rule(Direction::Out, rule.finalize()); 37 | 38 | Ok(()) 39 | } 40 | -------------------------------------------------------------------------------- /pkg/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export PUBLISHER=helios-dev 4 | export COMMIT_COUNT=`git rev-list --count HEAD` 5 | export REPO=packages/repo 6 | 7 | set -e 8 | set -x 9 | 10 | ./clean.sh 11 | 12 | # create the proto area 13 | mkdir -p proto/kernel/drv/amd64 14 | mkdir -p proto/opt/oxide/opte/bin 15 | mkdir -p proto/usr/lib/devfsadm/linkmod 16 | cp ../target/release/opteadm proto/opt/oxide/opte/bin/ 17 | cp ../target/x86_64-unknown-unknown/release-lto/xde proto/kernel/drv/amd64 18 | cp ../xde/xde.conf proto/kernel/drv/ 19 | cp ../target/i686-unknown-illumos/release/libxde_link.so proto/usr/lib/devfsadm/linkmod/SUNW_xde_link.so 20 | 21 | if [ -z ${RELEASE_ONLY+x} ]; then 22 | cp ../target/debug/opteadm proto/opt/oxide/opte/bin/opteadm.dbg 23 | cp ../target/x86_64-unknown-unknown/debug/xde.dbg proto/kernel/drv/amd64/xde.dbg 24 | INC_DEBUG="" 25 | else 26 | INC_DEBUG="#" 27 | fi 28 | 29 | API_VSN=$(./print-api-version.sh) 30 | 31 | # create the package 32 | sed -e "s/%PUBLISHER%/$PUBLISHER/g" \ 33 | -e "s/%COMMIT_COUNT%/$COMMIT_COUNT/g" \ 34 | -e "s/%API_VSN%/$API_VSN/g" \ 35 | opte.template.p5m | pkgmogrify -v -D inc_debug="$INC_DEBUG" -O opte.base.p5m 36 | 37 | pkgdepend generate -d proto opte.base.p5m > opte.generate.p5m 38 | 39 | mkdir -p packages 40 | pkgdepend resolve -d packages -s resolve.p5m opte.generate.p5m 41 | 42 | cat opte.base.p5m packages/opte.generate.p5m.resolve.p5m > opte.final.p5m 43 | 44 | pkgrepo create $REPO 45 | pkgrepo add-publisher -s $REPO $PUBLISHER 46 | 47 | pkgsend publish -d proto -s $REPO opte.final.p5m 48 | pkgrecv -a -d packages/repo/opte-0.$API_VSN.$COMMIT_COUNT.p5p -s $REPO \ 49 | -v -m latest '*' 50 | -------------------------------------------------------------------------------- /lib/oxide-vpc/src/engine/gateway/dhcpv6.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2023 Oxide Computer Company 6 | 7 | //! The DHCPv6 implementation of the Virtual Gateway. 8 | 9 | use crate::cfg::VpcCfg; 10 | use alloc::sync::Arc; 11 | use opte::api::DhcpCfg; 12 | use opte::api::Direction; 13 | use opte::api::OpteError; 14 | use opte::engine::dhcpv6::AddressInfo; 15 | use opte::engine::dhcpv6::Dhcpv6Action; 16 | use opte::engine::dhcpv6::LeasedAddress; 17 | use opte::engine::layer::Layer; 18 | use opte::engine::rule::Action; 19 | use opte::engine::rule::Rule; 20 | 21 | pub fn setup( 22 | layer: &mut Layer, 23 | cfg: &VpcCfg, 24 | dhcp_cfg: DhcpCfg, 25 | ) -> Result<(), OpteError> { 26 | let ip_cfg = match cfg.ipv6_cfg() { 27 | None => return Ok(()), 28 | Some(ip_cfg) => ip_cfg, 29 | }; 30 | 31 | // The main DHCPv6 server action, which currently just leases the 32 | // VPC-private IP addresses to the client. 33 | let addrs = AddressInfo { 34 | addrs: vec![LeasedAddress::infinite_lease(ip_cfg.private_ip)], 35 | renew: u32::MAX, 36 | }; 37 | let action = Dhcpv6Action { 38 | client_mac: cfg.guest_mac, 39 | server_mac: cfg.gateway_mac, 40 | addrs, 41 | sntp_servers: vec![], 42 | dhcp_cfg, 43 | }; 44 | 45 | let server = Action::Hairpin(Arc::new(action)); 46 | let rule = Rule::new(1, server); 47 | layer.add_rule(Direction::Out, rule.finalize()); 48 | Ok(()) 49 | } 50 | -------------------------------------------------------------------------------- /pkg/opte.template.p5m: -------------------------------------------------------------------------------- 1 | default variant.opensolaris.zone global> 2 | set name=pkg.fmri \ 3 | value=pkg://%PUBLISHER%/driver/network/opte@0.%API_VSN%.%COMMIT_COUNT% 4 | set name=pkg.summary value="The Oxide Packet Transformation Engine" 5 | set name=info.classification \ 6 | value=org.opensolaris.category.2008:Drivers/Networking 7 | set name=variant.opensolaris.zone value=global value=nonglobal 8 | set name=variant.arch value=i386 9 | set name=variant.debug.illumos value=false value=true 10 | dir path=opt owner=root group=sys mode=0755 11 | dir path=opt/oxide owner=root group=bin mode=0755 12 | dir path=opt/oxide/opte owner=root group=bin mode=0755 13 | dir path=opt/oxide/opte/bin owner=root group=bin mode=0755 14 | file path=opt/oxide/opte/bin/opteadm owner=root group=bin mode=0755 \ 15 | variant.debug.illumos=false 16 | $(inc_debug)file opt/oxide/opte/bin/opteadm.dbg path=opt/oxide/opte/bin/opteadm owner=root group=bin mode=0755 \ 17 | variant.debug.illumos=true 18 | dir path=kernel owner=root group=sys mode=0755 19 | dir path=kernel/drv owner=root group=sys mode=0755 20 | file path=kernel/drv/xde.conf owner=root group=sys mode=0644 preserve=renamenew 21 | dir path=usr/lib/devfsadm owner=root group=sys mode=0755 22 | dir path=usr/lib/devfsadm/linkmod owner=root group=sys mode=0755 23 | file path=usr/lib/devfsadm/linkmod/SUNW_xde_link.so owner=root group=sys mode=0755 24 | 25 | file path=kernel/drv/amd64/xde owner=root group=sys mode=0755 \ 26 | variant.debug.illumos=false \ 27 | variant.opensolaris.zone=global 28 | $(inc_debug)file kernel/drv/amd64/xde.dbg path=kernel/drv/amd64/xde owner=root group=sys mode=0755 \ 29 | variant.debug.illumos=true \ 30 | variant.opensolaris.zone=global 31 | 32 | driver name=xde 33 | -------------------------------------------------------------------------------- /xde/src/sys.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2022 Oxide Computer Company 6 | 7 | // stuff we need from common/sys 8 | 9 | use crate::ip::processorid_t; 10 | 11 | pub const VLAN_TAGSZ: u32 = 4; 12 | pub const ALL_ZONES: i32 = -1; 13 | 14 | unsafe extern "C" { 15 | safe fn curcpup() -> *mut crate::ip::cpu; 16 | } 17 | 18 | /// Return the current number of CPUs reported by illumos. 19 | #[inline] 20 | pub fn ncpus() -> usize { 21 | usize::try_from(unsafe { crate::ip::ncpus }) 22 | .expect("CPU count is non-negative, and usize is >=32b") 23 | } 24 | 25 | /// Return information on the currently executing CPU. 26 | #[inline] 27 | pub fn current_cpu() -> CurrentCpu { 28 | // struct cpu contains a lot more than this, but these are the only 29 | // fields we need. 30 | #[repr(C)] 31 | #[derive(Copy, Clone)] 32 | struct partial_cpu { 33 | cpu_id: processorid_t, 34 | cpu_seqid: processorid_t, 35 | } 36 | 37 | let cpu = unsafe { 38 | let cpu_ptr = curcpup() as *mut partial_cpu; 39 | *cpu_ptr 40 | }; 41 | 42 | CurrentCpu { 43 | id: usize::try_from(cpu.cpu_id) 44 | .expect("CPU count is non-negative, and usize is >=32b"), 45 | seq_id: usize::try_from(cpu.cpu_seqid) 46 | .expect("CPU count is non-negative, and usize is >=32b"), 47 | } 48 | } 49 | 50 | /// Information about the currently executing CPU. 51 | pub struct CurrentCpu { 52 | /// ID of the current CPU. 53 | pub id: usize, 54 | 55 | /// Index of the current CPU in the range `0..ncpus()`. 56 | pub seq_id: usize, 57 | } 58 | -------------------------------------------------------------------------------- /dtrace/opte-rule-match.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Track rule match/no-match as it happens. 3 | * 4 | * dtrace -L ./lib -I . -Cqs ./opte-rule-match.d 5 | */ 6 | #include "common.h" 7 | #include "protos.d" 8 | 9 | #define HDR_FMT "%-8s %-12s %-6s %-3s %-43s %s\n" 10 | 11 | BEGIN { 12 | printf(HDR_FMT, "PORT", "LAYER", "MATCH", "DIR", "FLOW", "ACTION"); 13 | num = 0; 14 | } 15 | 16 | rule-match { 17 | this->match = (rule_match_sdt_arg_t *)arg0; 18 | this->port = stringof(this->match->port); 19 | this->layer = stringof(this->match->layer); 20 | this->flow = this->match->flow; 21 | this->dir = DIR_STR(this->match->dir); 22 | this->af = this->flow->af; 23 | num++; 24 | 25 | if (num >= 10) { 26 | printf(HDR_FMT, "PORT", "LAYER", "MATCH", "DIR", "FLOW", 27 | "ACTION"); 28 | num = 0; 29 | } 30 | } 31 | 32 | rule-match /this->af == AF_INET/ { 33 | FLOW_FMT(this->s, this->flow); 34 | printf(HDR_FMT, this->port, this->layer, "YES", this->dir, this->s, 35 | stringof(this->match->rule_type)); 36 | } 37 | 38 | rule-match /this->af == AF_INET6/ { 39 | FLOW_FMT6(this->s, this->flow); 40 | printf(HDR_FMT, this->port, this->layer, "YES", this->dir, this->s, 41 | stringof(this->match->rule_type)); 42 | } 43 | 44 | rule-no-match { 45 | this->no_match = (rule_no_match_sdt_arg_t *)arg0; 46 | this->flow = this->no_match->flow; 47 | this->dir = DIR_STR(this->no_match->dir); 48 | this->layer = stringof(this->no_match->layer); 49 | this->af = this->flow->af; 50 | num++; 51 | } 52 | 53 | rule-no-match /this->af == AF_INET/ { 54 | FLOW_FMT(this->s, this->flow); 55 | printf(HDR_FMT, this->port, this->layer, "NO", this->dir, this->s, 56 | "--"); 57 | } 58 | 59 | rule-no-match /this->af == AF_INET6/ { 60 | FLOW_FMT6(this->s, this->flow); 61 | printf(HDR_FMT, this->port, this->layer, "NO", this->dir, this->s, 62 | "--"); 63 | } 64 | -------------------------------------------------------------------------------- /lib/opte/src/engine/tcp.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2025 Oxide Computer Company 6 | 7 | //! TCP headers. 8 | 9 | use super::flow_table::Ttl; 10 | use serde::Deserialize; 11 | use serde::Serialize; 12 | 13 | pub const TCP_HDR_OFFSET_MASK: u8 = 0xF0; 14 | pub const TCP_HDR_OFFSET_SHIFT: u8 = 4; 15 | 16 | pub const TCP_PORT_RDP: u16 = 3389; 17 | pub const TCP_PORT_SSH: u16 = 22; 18 | 19 | /// The duration after which a connection in TIME-WAIT should be 20 | /// considered free for either side to reuse. 21 | /// 22 | /// This value is chosen by Windows and MacOS, which is larger 23 | /// than Linux's default 60s. Allowances for tuned servers and/or 24 | /// more aggressive reuse via RFCs 1323/7323 and/or 6191 are made in 25 | /// `tcp_state`. 26 | pub const TIME_WAIT_EXPIRE_SECS: u64 = 120; 27 | /// The duration after which otherwise healthy TCP flows should be pruned. 28 | /// 29 | /// Currently, this is tuned to be 2.5 hours: higher than the default behaviour 30 | /// for SO_KEEPALIVE on linux/illumos. Each will wait 2 hours before sending a 31 | /// keepalive, when interval + probe count will result in a timeout after 32 | /// 8mins (illumos) / 11mins (linux). 33 | pub const KEEPALIVE_EXPIRE_SECS: u64 = 8_000; 34 | pub const TIME_WAIT_EXPIRE_TTL: Ttl = Ttl::new_seconds(TIME_WAIT_EXPIRE_SECS); 35 | pub const KEEPALIVE_EXPIRE_TTL: Ttl = Ttl::new_seconds(KEEPALIVE_EXPIRE_SECS); 36 | 37 | #[derive( 38 | Clone, 39 | Copy, 40 | Debug, 41 | Default, 42 | Deserialize, 43 | Eq, 44 | Ord, 45 | PartialEq, 46 | PartialOrd, 47 | Serialize, 48 | )] 49 | pub struct TcpPush { 50 | pub src: u16, 51 | pub dst: u16, 52 | } 53 | 54 | #[derive(Clone, Debug, Deserialize, Serialize)] 55 | pub struct TcpMod { 56 | src: Option, 57 | dst: Option, 58 | } 59 | -------------------------------------------------------------------------------- /dtrace/opte-layer-process.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Track a flow as it is processed by different layers. This only 3 | * applies to flows without a current UFT entry. 4 | * 5 | * XXX Teach ARP to SDT probe + this script. 6 | * 7 | * dtrace -L ./lib -I . -Cqs ./opte-layer-process.d 8 | */ 9 | #include "common.h" 10 | #include "protos.d" 11 | 12 | #define HDR_FMT "%-16s %-16s %-3s %-48s %-48s %s\n" 13 | 14 | BEGIN { 15 | printf(HDR_FMT, "PORT", "LAYER", "DIR", "FLOW BEFORE", "FLOW AFTER", 16 | "RES"); 17 | num = 0; 18 | } 19 | 20 | layer-process-return { 21 | this->dir = DIR_STR(arg0); 22 | this->port = stringof(arg1); 23 | this->layer = stringof(arg2); 24 | this->flow_before = (flow_id_sdt_arg_t *)arg3; 25 | this->flow_after = (flow_id_sdt_arg_t *)arg4; 26 | this->msgs = (derror_sdt_arg_t*) arg5; 27 | this->msg_len = this->msgs->len; 28 | this->res = stringof(""); 29 | 30 | if (num >= 10) { 31 | printf(HDR_FMT, "PORT", "LAYER", "DIR", "FLOW BEFORE", 32 | "FLOW AFTER", "RES"); 33 | num = 0; 34 | } 35 | 36 | this->af = this->flow_before->af; 37 | 38 | if (this->af != AF_INET && this->af != AF_INET6) { 39 | printf("BAD ADDRESS FAMILY: %d\n", this->af); 40 | } 41 | } 42 | 43 | layer-process-return 44 | /this->msg_len > 0/ 45 | { 46 | this->res = strjoin(this->res, stringof(this->msgs->entry[0])); 47 | } 48 | 49 | layer-process-return 50 | /this->msg_len > 1/ 51 | { 52 | this->res = strjoin(this->res, EL_DELIMIT); 53 | this->res = strjoin(this->res, stringof(this->msgs->entry[1])); 54 | } 55 | 56 | layer-process-return /this->af == AF_INET/ { 57 | FLOW_FMT(this->s_before, this->flow_before); 58 | FLOW_FMT(this->s_after, this->flow_after); 59 | printf(HDR_FMT, this->port, this->layer, this->dir, this->s_before, 60 | this->s_after, this->res); 61 | num++; 62 | } 63 | 64 | layer-process-return /this->af == AF_INET6/ { 65 | FLOW_FMT6(this->s_before, this->flow_before); 66 | FLOW_FMT6(this->s_after, this->flow_after); 67 | printf(HDR_FMT, this->port, this->layer, this->dir, this->s_before, 68 | this->s_after, this->res); 69 | num++; 70 | } 71 | -------------------------------------------------------------------------------- /lib/opte/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "opte" 3 | version = "0.1.0" 4 | 5 | edition.workspace = true 6 | license.workspace = true 7 | repository.workspace = true 8 | 9 | [features] 10 | default = ["api", "std"] 11 | api = ["dep:zerocopy"] 12 | engine = [ 13 | "api", 14 | "dep:cfg-if", 15 | "dep:crc32fast", 16 | "dep:derror-macro", 17 | "dep:heapless", 18 | "dep:itertools", 19 | "dep:smoltcp", 20 | "dep:zerocopy" 21 | ] 22 | kernel = ["illumos-sys-hdrs/kernel", "dep:cfg-if"] 23 | # This feature indicates that OPTE is being built with std. This is 24 | # mostly useful to consumers of the API, providing convenient methods 25 | # for working with the API types in a std context. 26 | std = ["dep:tabwriter", "opte-api/std"] 27 | # 28 | # Used for declaring methods which are useful for integration testing. 29 | # 30 | test-help = [] 31 | usdt = ["std", "dep:usdt"] 32 | 33 | [dependencies] 34 | derror-macro = { workspace = true, optional = true } 35 | illumos-sys-hdrs.workspace = true 36 | kstat-macro.workspace = true 37 | opte-api.workspace = true 38 | 39 | ingot.workspace = true 40 | 41 | bitflags = { workspace = true , features = ["serde"] } 42 | cfg-if = { workspace = true, optional = true } 43 | crc32fast = { workspace = true, optional = true } 44 | dyn-clone.workspace = true 45 | heapless = { workspace = true, optional = true } 46 | itertools = { workspace = true, optional = true } 47 | postcard.workspace = true 48 | ref-cast.workspace = true 49 | serde.workspace = true 50 | tabwriter = { workspace = true, optional = true } 51 | usdt = { workspace = true, optional = true } 52 | zerocopy = { workspace = true, optional = true } 53 | 54 | [dependencies.smoltcp] 55 | workspace = true 56 | optional = true 57 | default-features = false 58 | # 59 | # TODO Would defmt be of any use? 60 | # 61 | features = ["alloc", "medium-ethernet", "proto-ipv4", "proto-ipv6", "proto-dhcpv4", "socket", "socket-raw"] 62 | 63 | [build-dependencies] 64 | version_check.workspace = true 65 | 66 | [dev-dependencies] 67 | # Enable usdt probes for testing. 68 | opte = { workspace = true, features = ["engine", "usdt"] } 69 | itertools = { workspace = true, features = ["use_std"] } 70 | -------------------------------------------------------------------------------- /crates/opte-api/src/dhcpv6.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2023 Oxide Computer Company 6 | 7 | //! Types for working with the DHCPv6 8 | 9 | use crate::Ipv6Addr; 10 | use alloc::string::String; 11 | use alloc::vec::Vec; 12 | 13 | /// A single leased IPv6 address, with associated lifetime. 14 | #[derive(Clone, Copy, Debug)] 15 | pub struct LeasedAddress { 16 | /// The leased address. 17 | pub addr: Ipv6Addr, 18 | 19 | // The preferred lifetime for this address. 20 | preferred: u32, 21 | 22 | // The maximum valid lifetime for this address. 23 | valid: u32, 24 | } 25 | 26 | impl LeasedAddress { 27 | /// Construct an address lease with infinite lifetime. 28 | pub fn infinite_lease(addr: Ipv6Addr) -> Self { 29 | Self { addr, preferred: u32::MAX, valid: u32::MAX } 30 | } 31 | 32 | /// Construct a new leased address with checked lifetimes, in seconds. 33 | /// 34 | /// The preferred lifetime must be no longer than the valid lifetime. 35 | pub fn new( 36 | addr: Ipv6Addr, 37 | preferred: u32, 38 | valid: u32, 39 | ) -> Result { 40 | if valid < preferred { 41 | return Err(String::from( 42 | "Preferred lifetime must be <= valid lifetime", 43 | )); 44 | } 45 | Ok(Self { addr, preferred, valid }) 46 | } 47 | 48 | /// Return the valid lifetime, in seconds. 49 | pub fn valid(&self) -> u32 { 50 | self.valid 51 | } 52 | 53 | /// Return the preferred lifetime, in seconds. 54 | pub fn preferred(&self) -> u32 { 55 | self.preferred 56 | } 57 | } 58 | 59 | /// Information about IPv6 addresses leased by OPTE. 60 | #[derive(Clone, Debug)] 61 | pub struct AddressInfo { 62 | /// The set of addresses OPTE will lease. 63 | pub addrs: Vec, 64 | /// The time (in seconds) after which the client should renew the lease. 65 | /// 66 | /// NOTE: This is used as both T1 and T2 in a Non-Temporary Address 67 | /// Assignment. 68 | pub renew: u32, 69 | } 70 | -------------------------------------------------------------------------------- /dtrace/opte-port-process.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Track port process results as they happen. 3 | * 4 | * dtrace -L ./lib -I . -Cqs ./opte-port-process.d 5 | */ 6 | #include "common.h" 7 | #include "protos.d" 8 | 9 | #define HDR_FMT "%-12s %-3s %-8s %-43s %-43s %-5s %s %s\n" 10 | #define LINE_FMT "%-12s %-3s %-8u %-43s %-43s %-5u %s %s\n" 11 | 12 | BEGIN { 13 | printf(HDR_FMT, "NAME", "DIR", "EPOCH", "FLOW BEFORE", "FLOW AFTER", 14 | "LEN", "RESULT", "PATH"); 15 | num = 0; 16 | } 17 | 18 | port-process-return { 19 | this->dir = DIR_STR(arg0); 20 | this->name = stringof(arg1); 21 | this->flow_before = (flow_id_sdt_arg_t *)arg2; 22 | this->flow_after = (flow_id_sdt_arg_t *)arg3; 23 | this->epoch = arg4; 24 | this->mp = (mblk_t *)arg5; 25 | /* If the result is a hairpin packet, then hp_mp is non-NULL. */ 26 | this->hp_mp = (mblk_t *)arg6; 27 | this->msgs = (derror_sdt_arg_t*) arg7; 28 | this->msg_len = this->msgs->len; 29 | this->res = stringof(""); 30 | this->path = PATH_STR(arg8); 31 | 32 | if (num >= 10) { 33 | printf(HDR_FMT, "NAME", "DIR", "EPOCH", "FLOW BEFORE", 34 | "FLOW AFTER", "LEN", "RESULT", "PATH"); 35 | num = 0; 36 | } 37 | 38 | this->af = this->flow_before->af; 39 | 40 | if (this->af != AF_INET && this->af != AF_INET6) { 41 | printf("BAD ADDRESS FAMILY: %d\n", this->af); 42 | } 43 | } 44 | 45 | port-process-return 46 | /this->msg_len > 0/ 47 | { 48 | this->res = strjoin(this->res, stringof(this->msgs->entry[0])); 49 | } 50 | 51 | port-process-return 52 | /this->msg_len > 1/ 53 | { 54 | this->res = strjoin(this->res, EL_DELIMIT); 55 | this->res = strjoin(this->res, stringof(this->msgs->entry[1])); 56 | } 57 | 58 | port-process-return /this->af == AF_INET/ { 59 | FLOW_FMT(this->s_before, this->flow_before); 60 | FLOW_FMT(this->s_after, this->flow_after); 61 | printf(LINE_FMT, this->name, this->dir, this->epoch, this->s_before, 62 | this->s_after, msgsize(this->mp), this->res, this->path); 63 | num++; 64 | } 65 | 66 | port-process-return /this->af == AF_INET6/ { 67 | FLOW_FMT6(this->s_before, this->flow_before); 68 | FLOW_FMT6(this->s_after, this->flow_after); 69 | printf(LINE_FMT, this->name, this->dir, this->epoch, this->s_before, 70 | this->s_after, msgsize(this->mp), this->res, this->path); 71 | num++; 72 | } 73 | 74 | -------------------------------------------------------------------------------- /lib/opte/src/dynamic.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2025 Oxide Computer Company 6 | 7 | //! A KRwLock-based wrapper for dynamically updateable resources (e.g., config), 8 | //! and for memoizing the outputs generated from those resources. 9 | 10 | // TODO: may want to look into porting arc-swap for alloc and core, 11 | // which should allow us to do better than a mutex. 12 | 13 | // TODO: Implement the generated outputs to reduce cost of, e.g., DHCP responses. 14 | 15 | use crate::ddi::sync::KRwLock; 16 | use alloc::sync::Arc; 17 | use core::fmt::Debug; 18 | use core::ops::Deref; 19 | use core::sync::atomic::AtomicU64; 20 | use core::sync::atomic::Ordering; 21 | use core::write; 22 | 23 | #[derive(Clone)] 24 | pub struct Dynamic(Arc>); 25 | 26 | struct InnerDynamic { 27 | inner: KRwLock>, 28 | epoch: AtomicU64, 29 | } 30 | 31 | #[derive(Debug)] 32 | pub struct Snapshot { 33 | pub value: Arc, 34 | pub epoch: u64, 35 | } 36 | 37 | impl From for Dynamic { 38 | fn from(value: T) -> Self { 39 | let inner = KRwLock::new(value.into()); 40 | 41 | Self(InnerDynamic { inner, epoch: AtomicU64::default() }.into()) 42 | } 43 | } 44 | 45 | impl Dynamic { 46 | pub fn store(&self, value: T) { 47 | let mut inner = self.0.inner.write(); 48 | *inner = value.into(); 49 | _ = self.0.epoch.fetch_add(1, Ordering::Relaxed); 50 | } 51 | 52 | pub fn load(&self) -> Snapshot { 53 | let value_locked = self.0.inner.read(); 54 | let value = Arc::clone(&*value_locked); 55 | let epoch = self.0.epoch.load(Ordering::Relaxed); 56 | 57 | Snapshot { epoch, value } 58 | } 59 | } 60 | 61 | impl Debug for Dynamic { 62 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { 63 | let current_val = self.load(); 64 | write!(f, "{current_val:?}") 65 | } 66 | } 67 | 68 | impl Deref for Snapshot { 69 | type Target = T; 70 | 71 | fn deref(&self) -> &Self::Target { 72 | &self.value 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /CONTRIBUTING.adoc: -------------------------------------------------------------------------------- 1 | == OPTE status and open source 2 | 3 | The OPTE repo is public because: 4 | 5 | 1. It has always been our intention to make this open-source. 6 | 7 | 2. We figured it's best to err on the side of doing this early instead 8 | of late. 9 | 10 | However, OPTE is not finished, or even ready. 11 | 12 | And so, we thought it was important to explain where we're currently 13 | at, and manage your expectations. 14 | 15 | - We are a small company. 16 | 17 | - Our current goal is to get our first generation products finished 18 | and in customers' hands. 19 | 20 | - We're writing OPTE in support of that goal, not as its own thing. 21 | We're all working on the products, and tool development is a side 22 | effect. That said, OPTE will be more generally useful if you are 23 | looking for a packet transformation engine to implement network 24 | functions in an illumos-based environment; it's just the current 25 | focus is solely on supporting Oxide and the Oxide Rack. 26 | 27 | - OPTE may have dependencies on other Oxide repositories, which 28 | themselves may be undergoing similar churn. 29 | 30 | - These points together mean that we may not have enough bandwidth to 31 | review and integrate outside PRs right now. We hope this will change 32 | in the future. 33 | 34 | You're welcome to send PRs, but we want to set expectations right: if 35 | we have time, or if the PRs are very small or fix bugs, we may 36 | integrate them in the near future. But we might also not get to any PR 37 | for a while, by which time it might no longer be relevant. Also keep 38 | in mind that some aspects of the code which have obvious flaws or 39 | TODOs may already be scheduled for change, but there are other more 40 | pressing matters to attend to first. If you feel compelled to write a 41 | PR, it would be best to first reach out before starting any work in 42 | earnest, as there may already been planned changes that would obsolete 43 | such work. 44 | 45 | We've all dealt with those open source projects that feel open in name 46 | only, and have big patches and history-free source drops appearing 47 | from behind the walls of some large organization. We don't like that, 48 | and we're not going to do that. But it will take some time for us to 49 | scale up -- please bear with us. 50 | 51 | Thanks! 52 | -------------------------------------------------------------------------------- /dtrace/opte-bad-packet.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Track bad packets as they happen. 3 | * 4 | * dtrace -L ./lib -I . -Cqs ./opte-bad-packet.d 5 | */ 6 | #include "common.h" 7 | 8 | #define HDR_FMT "%-12s %-3s %-18s %s\n" 9 | #define LINE_FMT "%-12s %-3s 0x%-16p %s[%d, %d]\n" 10 | 11 | BEGIN { 12 | printf(HDR_FMT, "PORT", "DIR", "MBLK", "MSG+DATA"); 13 | num = 0; 14 | } 15 | 16 | bad-packet { 17 | this->port = stringof(arg0); 18 | this->dir = DIR_STR(arg1); 19 | this->mblk = arg2; 20 | this->msgs = (derror_sdt_arg_t*) arg3; 21 | this->msg_len = this->msgs->len; 22 | this->data_len = arg4; 23 | this->res = stringof(""); 24 | 25 | if (num >= 10) { 26 | printf(HDR_FMT, "PORT", "DIR", "MBLK", "MSG+DATA"); 27 | num = 0; 28 | } 29 | 30 | num++; 31 | } 32 | 33 | /* We can probably roll this out with clever macro use. */ 34 | 35 | bad-packet 36 | /this->msg_len > 0/ 37 | { 38 | this->res = strjoin(this->res, stringof(this->msgs->entry[0])); 39 | } 40 | 41 | bad-packet 42 | /this->msg_len > 1/ 43 | { 44 | this->res = strjoin(this->res, EL_DELIMIT); 45 | this->res = strjoin(this->res, stringof(this->msgs->entry[1])); 46 | } 47 | 48 | bad-packet 49 | /this->msg_len > 2/ 50 | { 51 | this->res = strjoin(this->res, EL_DELIMIT); 52 | this->res = strjoin(this->res, stringof(this->msgs->entry[2])); 53 | } 54 | 55 | bad-packet 56 | /this->msg_len > 3/ 57 | { 58 | this->res = strjoin(this->res, EL_DELIMIT); 59 | this->res = strjoin(this->res, stringof(this->msgs->entry[3])); 60 | } 61 | 62 | bad-packet 63 | /this->msg_len > 4/ 64 | { 65 | this->res = strjoin(this->res, EL_DELIMIT); 66 | this->res = strjoin(this->res, stringof(this->msgs->entry[4])); 67 | } 68 | 69 | bad-packet 70 | /this->msg_len > 5/ 71 | { 72 | this->res = strjoin(this->res, EL_DELIMIT); 73 | this->res = strjoin(this->res, stringof(this->msgs->entry[5])); 74 | } 75 | 76 | bad-packet 77 | /this->msg_len > 6/ 78 | { 79 | this->res = strjoin(this->res, EL_DELIMIT); 80 | this->res = strjoin(this->res, stringof(this->msgs->entry[6])); 81 | } 82 | 83 | bad-packet 84 | /this->msg_len > 7/ 85 | { 86 | this->res = strjoin(this->res, EL_DELIMIT); 87 | this->res = strjoin(this->res, stringof(this->msgs->entry[7])); 88 | } 89 | 90 | bad-packet { 91 | printf(LINE_FMT, 92 | this->port, this->dir, this->mblk, 93 | this->res, this->msgs->data[0], this->msgs->data[1] 94 | ); 95 | } 96 | -------------------------------------------------------------------------------- /dtrace/common.h: -------------------------------------------------------------------------------- 1 | /* 2 | * MOVING RIGHT ALONG... 3 | * 4 | * inet_ntoa() wants an ipaddr_t pointer, but opte is passing 5 | * up the actual 32-bit IP value. You can't take the address 6 | * of a dynamic variable, so make local allocations to 7 | * reference. 8 | */ 9 | #define FLOW_FMT(svar, fvar) \ 10 | this->src_ip = (ipaddr_t *)alloca(4); \ 11 | this->dst_ip = (ipaddr_t *)alloca(4); \ 12 | *this->src_ip = fvar->addrs.ip4.src; \ 13 | *this->dst_ip = fvar->addrs.ip4.dst; \ 14 | svar = protos[fvar->proto]; \ 15 | svar = strjoin(svar, ","); \ 16 | svar = strjoin(svar, inet_ntoa(this->src_ip)); \ 17 | svar = strjoin(svar, ":"); \ 18 | svar = strjoin(svar, lltostr(fvar->src_port)); \ 19 | svar = strjoin(svar, ","); \ 20 | svar = strjoin(svar, inet_ntoa(this->dst_ip)); \ 21 | svar = strjoin(svar, ":"); \ 22 | svar = strjoin(svar, lltostr(fvar->dst_port)); 23 | 24 | #define FLOW_FMT6(svar, fvar) \ 25 | this->src_ip6 = (in6_addr_t *)alloca(16); \ 26 | this->dst_ip6 = (in6_addr_t *)alloca(16); \ 27 | *this->src_ip6 = fvar->addrs.ip6.src; \ 28 | *this->dst_ip6 = fvar->addrs.ip6.dst; \ 29 | svar = protos[fvar->proto]; \ 30 | svar = strjoin(svar, ",["); \ 31 | svar = strjoin(svar, inet_ntoa6(this->src_ip6)); \ 32 | svar = strjoin(svar, "]:"); \ 33 | svar = strjoin(svar, lltostr(fvar->src_port)); \ 34 | svar = strjoin(svar, ",["); \ 35 | svar = strjoin(svar, inet_ntoa6(this->dst_ip6)); \ 36 | svar = strjoin(svar, "]:"); \ 37 | svar = strjoin(svar, lltostr(fvar->dst_port)); 38 | 39 | #define ETH_FMT(svar, evar) \ 40 | svar = substr(lltostr(evar[0], 16), 2); \ 41 | svar = strjoin(svar, ":"); \ 42 | svar = strjoin(svar, substr(lltostr(evar[1], 16), 2)); \ 43 | svar = strjoin(svar, ":"); \ 44 | svar = strjoin(svar, substr(lltostr(evar[2], 16), 2)); \ 45 | svar = strjoin(svar, ":"); \ 46 | svar = strjoin(svar, substr(lltostr(evar[3], 16), 2)); \ 47 | svar = strjoin(svar, ":"); \ 48 | svar = strjoin(svar, substr(lltostr(evar[4], 16), 2)); \ 49 | svar = strjoin(svar, ":"); \ 50 | svar = strjoin(svar, substr(lltostr(evar[5], 16), 2)); 51 | 52 | /* Direction 53 | * 54 | * 1 = Inbound 55 | * 2 = Outbound 56 | */ 57 | #define DIR_STR(dir) ((dir) == 1 ? "IN" : "OUT") 58 | 59 | /* 60 | * Packet processing path. 61 | * 1 = UFT Compiled/Fast 62 | * 2 = UFT Hit/Medium 63 | * 3 = UFT Miss/Slow 64 | */ 65 | #define PATH_STR(path) ((path) == 1 ? "FAST" : ((path) == 2 ? "MED" : "SLOW")) 66 | 67 | #define EL_DELIMIT "->" 68 | #define EL_FMT "->%s" 69 | -------------------------------------------------------------------------------- /lib/opte-test-utils/src/pcap.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2024 Oxide Computer Company 6 | 7 | //! Routines for building packet capture files. 8 | 9 | use opte::ddi::mblk::MsgBlk; 10 | use pcap_parser::Linktype; 11 | use pcap_parser::ToVec; 12 | use pcap_parser::pcap; 13 | use pcap_parser::pcap::LegacyPcapBlock; 14 | use pcap_parser::pcap::PcapHeader; 15 | use std::fs::File; 16 | use std::io::Write; 17 | 18 | #[allow(dead_code)] 19 | fn get_header(offset: &[u8]) -> (&[u8], PcapHeader) { 20 | match pcap::parse_pcap_header(offset) { 21 | Ok((new_offset, header)) => (new_offset, header), 22 | Err(e) => panic!("failed to get header: {e:?}"), 23 | } 24 | } 25 | 26 | #[allow(dead_code)] 27 | fn next_block(offset: &[u8]) -> (&[u8], LegacyPcapBlock<'_>) { 28 | match pcap::parse_pcap_frame(offset) { 29 | Ok((new_offset, block)) => { 30 | // We always want access to the entire packet. 31 | assert_eq!(block.origlen, block.caplen); 32 | (new_offset, block) 33 | } 34 | 35 | Err(e) => panic!("failed to get next block: {e:?}"), 36 | } 37 | } 38 | 39 | /// Build a packet capture file from a series of packets. 40 | pub struct PcapBuilder { 41 | file: File, 42 | } 43 | 44 | impl PcapBuilder { 45 | /// Create a new pcap builder, writing all captures to `path`. 46 | pub fn new(path: &str) -> Self { 47 | let mut file = File::create(path).unwrap(); 48 | 49 | let mut hdr = PcapHeader { 50 | magic_number: 0xa1b2c3d4, 51 | version_major: 2, 52 | version_minor: 4, 53 | thiszone: 0, 54 | sigfigs: 0, 55 | snaplen: 1500, 56 | network: Linktype::ETHERNET, 57 | }; 58 | 59 | file.write_all(&hdr.to_vec().unwrap()).unwrap(); 60 | 61 | Self { file } 62 | } 63 | 64 | /// Add a packet to the capture. 65 | pub fn add_pkt(&mut self, pkt: &MsgBlk) { 66 | let pkt_bytes = pkt.copy_all(); 67 | let mut block = LegacyPcapBlock { 68 | ts_sec: 7777, 69 | ts_usec: 7777, 70 | caplen: pkt_bytes.len() as u32, 71 | origlen: pkt_bytes.len() as u32, 72 | data: &pkt_bytes, 73 | }; 74 | 75 | self.file.write_all(&block.to_vec().unwrap()).unwrap(); 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /bench/Gimlet.adoc: -------------------------------------------------------------------------------- 1 | = Running OPTE Benchmarks on a Gimlet (pair) 2 | 3 | The kernel module benchmarks can be run on a pair of lab gimlets connected over their `cxgbe` interfaces. 4 | Today, SN9/SN14 are set up in this configuration. 5 | 6 | == Image construction 7 | When https://github.com/oxidecomputer/helios[building a helios image], we need to include some extra packages and pull in the latest (host-installed) version of OPTE. 8 | After following instructions to setup Helios and build illumos, a Helios image suitable for running `kbench` can then be built using the command: 9 | 10 | [source, bash] 11 | ---- 12 | ./helios-build experiment-image \ 13 | -F extra_packages+=/network/test/iperf \ 14 | -F extra_packages+=/system/library/demangle \ 15 | -F extra_packages+=/ooce/developer/flamegraph \ 16 | -F optever=0.31.264 \ 17 | -B -N netperf 18 | ---- 19 | 20 | The version of opte installed here is unimportant, as we will often be using local unpublished binaries and modules for testing. 21 | This image may then be installed onto your bench gimlet(s) as standard. 22 | 23 | == Running kbench on a bench gimlet 24 | 25 | Bench gimlets will not have cargo or rust installed, so typically we will need to locally compile these artifacts on a Helios/illumos machine and transfer these artifacts onto the gimlet ramdisk: 26 | 27 | - the `xde` kernel module -- `target/x86_64-unknown-unknown/release/xde` 28 | - `opteadm` -- `target/release/opteadm` 29 | - the `kbench` binary. 30 | 31 | All three artifacts require a consistent API version. 32 | 33 | The first two are stored in a static location, however see link:README.adoc#in-situ-measurement[the README entry on in-situ measurement] to acquire the current path for the third entry. 34 | This `cargo bench ... --message-format json-render-diagnostics` invocation will return a path such as: 35 | 36 | [source,json] 37 | ---- 38 | [ 39 | "/develop/gits/opte/target/release/deps/xde-5f922c3588d78a41" 40 | ] 41 | ---- 42 | 43 | === Installing and running artifacts 44 | 45 | Assuming all components have been `rsync`/`scp`'d into `/tmp`, we run on both machines: 46 | 47 | [source, bash] 48 | ---- 49 | cd /tmp 50 | rem_drv xde 51 | cp xde /kernel/drv/amd64/ 52 | ---- 53 | 54 | On SN9: 55 | 56 | [source, bash] 57 | ---- 58 | ./kbench server -u cxgbe0 cxgbe1 -b omicron1 59 | ---- 60 | 61 | On SN14: 62 | 63 | [source, bash] 64 | ---- 65 | ./kbench remote 172.20.2.109 -u cxgbe0 cxgbe1 -b omicron1 66 | ---- 67 | 68 | `opteadm` invocations for inspecting state or, e.g., manually configuring the underlay can then be made using `/tmp/opteadm [args]`. 69 | -------------------------------------------------------------------------------- /xde/xde-link/build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | // Match linker flags used with other devfsadm plugins. 3 | // 4 | // DYNFLAGS = $(HSONAME) $(ZTEXT) $(ZDEFS) $(BDIRECT) \ 5 | // $(MAPFILES:%=-Wl,-M%) $(MAPFILE.PGA:%=-Wl,-M%) $(MAPFILE.NED:%=-Wl,-M%) \ 6 | // $(LDCHECKS) 7 | // 8 | // See: 9 | // - usr/src/Makefile.master 10 | // - usr/src/lib/Makefile.lib 11 | // - usr/src/cmd/devfsadm/Makefile.com 12 | 13 | // $(HSONAME) 14 | println!("cargo:rustc-cdylib-link-arg=-Wl,-hSUNW_xde_link.so"); 15 | 16 | // $(ZTEXT) $(ZDEFS) $(BDIRECT) 17 | println!("cargo:rustc-cdylib-link-arg=-Wl,-ztext"); 18 | println!("cargo:rustc-cdylib-link-arg=-Wl,-zdefs"); 19 | println!("cargo:rustc-cdylib-link-arg=-Wl,-Bdirect"); 20 | 21 | // $(MAPFILES) 22 | // 23 | // We reference symbols that exist only within devfsadm itself and 24 | // can only be resolved at runtime. At link time though it remains 25 | // unresolved and -zdefs thus forces an error. We suppress the error 26 | // by explicitly telling the linker that these symbols are external 27 | // via a mapfile (map.devfsadm-externs). 28 | // 29 | // See usr/src/cmd/devfsadm/mapfile-vers 30 | println!( 31 | "cargo:rustc-cdylib-link-arg=-Wl,-M{}/map.devfsadm-externs", 32 | env!("CARGO_MANIFEST_DIR"), 33 | ); 34 | 35 | // $(MAPFILE.PGA) $(MAPFILE.NED) 36 | println!("cargo:rustc-cdylib-link-arg=-Wl,-M/usr/lib/ld/map.pagealign"); 37 | println!("cargo:rustc-cdylib-link-arg=-Wl,-M/usr/lib/ld/map.noexdata"); 38 | 39 | // LDCHECKS = $(ZASSERTDEFLIB) $(ZGUIDANCE) $(ZFATALWARNINGS) 40 | println!("cargo:rustc-cdylib-link-arg=-Wl,-zassert-deflib"); 41 | println!("cargo:rustc-cdylib-link-arg=-Wl,-zguidance"); 42 | println!("cargo:rustc-cdylib-link-arg=-Wl,-zfatal-warnings"); 43 | 44 | // We're linking against libc and libdevinfo and relying on the linker 45 | // finding them in /lib. Unfortunately, the linker will also complain 46 | // about this (and subsequently fail due to -zfatal-warnings): 47 | // 48 | // ld: warning: dynamic library found on default search path (/lib): libdevinfo.so 49 | // ld: warning: dynamic library found on default search path (/lib): libc.so 50 | // 51 | // The in-gate devfsadm plugins don't have this problem because they always 52 | // link against libs in the workspace proto area and not the default search path. 53 | // Just explicitly suppress the warning for these two libs. 54 | println!("cargo:rustc-cdylib-link-arg=-Wl,-zassert-deflib=libc.so"); 55 | println!("cargo:rustc-cdylib-link-arg=-Wl,-zassert-deflib=libdevinfo.so"); 56 | } 57 | -------------------------------------------------------------------------------- /crates/opte-api/src/lib.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2025 Oxide Computer Company 6 | 7 | #![no_std] 8 | #![deny(unreachable_patterns)] 9 | #![deny(unused_must_use)] 10 | 11 | #[cfg(any(feature = "std", test))] 12 | #[macro_use] 13 | extern crate std; 14 | 15 | #[macro_use] 16 | extern crate alloc; 17 | 18 | use alloc::string::String; 19 | use core::fmt; 20 | use core::fmt::Display; 21 | use serde::Deserialize; 22 | use serde::Serialize; 23 | 24 | pub mod cmd; 25 | pub mod dhcpv6; 26 | pub mod dns; 27 | pub mod encap; 28 | pub mod ip; 29 | pub mod mac; 30 | pub mod ndp; 31 | pub mod tcp; 32 | pub mod ulp; 33 | 34 | pub use cmd::*; 35 | pub use dhcpv6::*; 36 | pub use dns::*; 37 | pub use encap::*; 38 | pub use ip::*; 39 | pub use mac::*; 40 | pub use ndp::*; 41 | pub use tcp::*; 42 | pub use ulp::*; 43 | 44 | /// The overall version of the API. 45 | /// 46 | /// Anytime an API is added, removed, or modified, this number should 47 | /// increment. Currently we attach no semantic meaning to the number 48 | /// other than as a means to verify that the user and kernel are compiled 49 | /// for the same API. A u64 is used to give future wiggle room to play bit 50 | /// games if needed. 51 | /// 52 | /// We rely on CI and the check-api-version.sh script to verify that 53 | /// this number is incremented anytime the oxide-api code changes. 54 | pub const API_VERSION: u64 = 38; 55 | 56 | /// Major version of the OPTE package. 57 | pub const MAJOR_VERSION: u64 = 0; 58 | 59 | #[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)] 60 | pub enum Direction { 61 | In = 1, 62 | Out = 2, 63 | } 64 | 65 | impl core::str::FromStr for Direction { 66 | type Err = String; 67 | 68 | fn from_str(s: &str) -> core::result::Result { 69 | match s.to_ascii_lowercase().as_str() { 70 | "in" => Ok(Direction::In), 71 | "out" => Ok(Direction::Out), 72 | _ => Err(format!("invalid direction: {s}")), 73 | } 74 | } 75 | } 76 | 77 | impl Display for Direction { 78 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 79 | let dirstr = match self { 80 | Direction::In => "IN", 81 | Direction::Out => "OUT", 82 | }; 83 | 84 | write!(f, "{dirstr}") 85 | } 86 | } 87 | 88 | /// Opaque identifier for a rule within a layer. 89 | pub type RuleId = u64; 90 | 91 | /// Set the underlay devices used by the xde kernel module 92 | #[derive(Clone, Debug, Serialize, Deserialize)] 93 | pub struct SetXdeUnderlayReq { 94 | pub u1: String, 95 | pub u2: String, 96 | } 97 | -------------------------------------------------------------------------------- /xde/src/dls/sys.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2025 Oxide Computer Company 6 | 7 | // stuff we need from dls 8 | 9 | use crate::mac; 10 | use crate::mac::mac_client_handle; 11 | use crate::mac::mac_tx_cookie_t; 12 | use illumos_sys_hdrs::boolean_t; 13 | use illumos_sys_hdrs::c_char; 14 | use illumos_sys_hdrs::c_int; 15 | use illumos_sys_hdrs::datalink_id_t; 16 | use illumos_sys_hdrs::mblk_t; 17 | use illumos_sys_hdrs::uintptr_t; 18 | use illumos_sys_hdrs::zoneid_t; 19 | 20 | unsafe extern "C" { 21 | pub fn dls_devnet_create( 22 | mh: *mut mac::mac_handle, 23 | linkid: datalink_id_t, 24 | zoneid: zoneid_t, 25 | ) -> c_int; 26 | 27 | pub fn dls_devnet_destroy( 28 | mh: *mut mac::mac_handle, 29 | linkid: *mut datalink_id_t, 30 | wait: boolean_t, 31 | ) -> c_int; 32 | 33 | pub fn dls_mgmt_get_linkid( 34 | name: *const c_char, 35 | linkid: *mut datalink_id_t, 36 | ) -> c_int; 37 | } 38 | 39 | // Private DLS functions needed to have a Tx path on top of 40 | // an existing link while circumventing `ip`. 41 | unsafe extern "C" { 42 | pub type dls_devnet_s; 43 | pub type dld_str_s; 44 | pub type dls_link; 45 | 46 | /// Transmit a packet chain on a given link. 47 | /// This is effectively one layer above mac_tx. 48 | pub fn str_mdata_fastpath_put( 49 | dsp: *mut dld_str_s, 50 | mp: *mut mblk_t, 51 | f_hint: uintptr_t, 52 | flag: u16, 53 | ) -> mac_tx_cookie_t; 54 | 55 | // NOTE: ALL BELOW FUNCTIONS REQUIRE THE MAC PERIMETER TO BE HELD. 56 | pub fn dls_devnet_hold( 57 | link: datalink_id_t, 58 | ddhp: *mut dls_dl_handle, 59 | ) -> c_int; 60 | 61 | pub fn dls_devnet_rele(dlh: dls_dl_handle); 62 | 63 | pub fn dls_link_hold( 64 | name: *const c_char, 65 | dlpp: *mut *mut dls_link, 66 | ) -> c_int; 67 | 68 | pub fn dls_link_rele(dlp: *mut dls_link); 69 | 70 | pub fn dls_devnet_mac(dlh: dls_dl_handle) -> *const c_char; 71 | 72 | pub fn dls_open( 73 | dlp: *mut dls_link, 74 | ddh: dls_dl_handle, 75 | dsp: *mut dld_str_s, 76 | ) -> c_int; 77 | 78 | pub fn dls_close(dsp: *mut dld_str_s); 79 | 80 | // These are stlouis-only methods used to enable the 81 | // approach we're using here to get a Tx pathway via the 82 | // existing primary MAC client on the underlay devices. 83 | pub fn dld_str_create_detached() -> *mut dld_str_s; 84 | pub fn dld_str_destroy_detached(val: *mut dld_str_s); 85 | pub fn dld_str_mac_client_handle( 86 | val: *mut dld_str_s, 87 | ) -> *mut mac_client_handle; 88 | } 89 | 90 | pub type dls_dl_handle = *mut dls_devnet_s; 91 | -------------------------------------------------------------------------------- /README.adoc: -------------------------------------------------------------------------------- 1 | = Oxide Packet Transformation Engine 2 | 3 | == Installation 4 | On helios systems, OPTE can be built and installed using the `cargo xtask install` command. 5 | 6 | For ease of development, the above command will bypass `pkg`. OPTE can instead be installed from a new IPS package using `cargo xtask install --from-package`, which may require the `--force-package-unfreeze` flag if OPTE has been installed as a prerequisite for omicron. 7 | 8 | == Contributing 9 | 10 | Please see the xref:CONTRIBUTING.adoc[CONTRIBUTING] doc if you are 11 | interested in contributing to the project. 12 | 13 | == Tests and performance 14 | 15 | While the XDE kernel module runs only on Helios, our test suite and microbenchmarks work on Unix-like development systems. 16 | 17 | [%header,cols="1,1"] 18 | |=== 19 | | Helios 20 | | Linux/Mac/... 21 | 22 | | `cargo nextest run` 23 | | `cargo nextest run -p opte -p opte-api -p oxide-vpc` 24 | 25 | | `cargo build -p xde-tests --tests`, `pfexec target/debug/deps/loopback-` 26 | | N/A 27 | 28 | | `cargo ubench` 29 | | `cargo ubench` 30 | 31 | | `cargo kbench` 32 | | N/A 33 | 34 | | N/A 35 | | `cargo +nightly fuzz run parse-in`, `cargo +nightly fuzz run parse-out` 36 | |=== 37 | 38 | More detail on our benchmarks can be found in xref:bench/README.adoc[bench/README]. 39 | 40 | == Relevant RFDs and Inspiration 41 | 42 | * https://rfd.shared.oxide.computer/rfd/0009[RFD 9: Networking Considerations] 43 | * https://rfd.shared.oxide.computer/rfd/0021[RFD 21: User Networking API] 44 | * https://rfd.shared.oxide.computer/rfd/0063[RFD 63: Network Architecture] 45 | * https://rfd.shared.oxide.computer/rfd/488[RFD 488: Multicast] 46 | * https://www.microsoft.com/en-us/research/wp-content/uploads/2017/03/vfp-nsdi-2017-final.pdf[Microsoft's VFP] 47 | 48 | == Directory Index 49 | 50 | dtrace:: A collection of DTrace scripts for common tracing and 51 | diagnostic processes useful for understanding the runtime behavior of 52 | OPTE. 53 | 54 | illumos-sys-hdrs:: A raw interface to the types and function 55 | prototypes in the illumos `uts/common/sys` headers. 56 | 57 | opte:: The implementation of the generic bits of OPTE. This acts as 58 | both a library and framework which is used as the basis for 59 | implementing some type of network. For example, the oxide-vpc crate 60 | uses the opte crate to implement the Oxide VPC network. 61 | 62 | opte-ioctl:: The userland library used for interacting with OPTE. 63 | 64 | opteadm:: The command line utility used to configure and inspect OPTE. 65 | This is meant to be used by an operator, not by a client program. 66 | 67 | oxide-vpc:: An implementation of the Oxide VPC network. 68 | 69 | pkg:: A script for generating an IPS package for the `xde` driver. 70 | 71 | xde:: A kernel module for running OPTE in the kernel. This module 72 | allows OPTE to interface with the system and for clients to interface 73 | with OPTE. It currently focuses purely on the Oxide VPC use case. 74 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = [ 3 | "bench", 4 | "bin/*", 5 | "crates/*", 6 | "fuzz", 7 | "lib/*", 8 | "xde", 9 | "xde/xde-link", 10 | "xde-tests", 11 | "xtask", 12 | ] 13 | default-members = [ 14 | "bin/*", 15 | "crates/*", 16 | "lib/*", 17 | ] 18 | resolver = "2" 19 | 20 | [workspace.package] 21 | edition = "2024" 22 | license = "MPL-2.0" 23 | repository = "https://github.com/oxidecomputer/opte" 24 | 25 | [workspace.dependencies] 26 | # Internal crates 27 | derror-macro = { path = "crates/derror-macro" } 28 | illumos-sys-hdrs = { path = "crates/illumos-sys-hdrs" } 29 | kstat-macro = { path = "crates/kstat-macro" } 30 | opte-api = { path = "crates/opte-api", default-features = false } 31 | 32 | # Shared internal testing resources 33 | opte-test-utils = { path = "lib/opte-test-utils" } 34 | 35 | # Public library crates 36 | opte = { path = "lib/opte", default-features = false } 37 | opte-ioctl = { path = "lib/opte-ioctl" } 38 | opteadm = { path = "bin/opteadm" } 39 | oxide-vpc = { path = "lib/oxide-vpc", default-features = false } 40 | 41 | # External dependencies 42 | anyhow = "1.0" 43 | bitflags = "2" 44 | cargo_metadata = "0.23" 45 | cfg-if = "1" 46 | clap = { version = "4", features = ["derive", "string", "wrap_help"] } 47 | crc32fast = { version = "1", default-features = false } 48 | criterion = "0.8" 49 | ctor = "0.6" 50 | darling = "0.21" 51 | dyn-clone = "1.0" 52 | heapless = "0.8" 53 | ingot = "0.1.1" 54 | ipnetwork = { version = "0.21", default-features = false } 55 | itertools = { version = "0.14", default-features = false } 56 | libc = "0.2" 57 | libnet = { git = "https://github.com/oxidecomputer/netadm-sys", branch = "main" } 58 | nix = { version = "0.30", features = ["signal", "user"] } 59 | pcap-parser = "0.17" 60 | postcard = { version = "1", features = ["alloc"], default-features = false } 61 | proc-macro2 = { version = "1" } 62 | quote = "1.0" 63 | rand = "0.9.2" 64 | ref-cast = "1.0.25" 65 | ron = "0.12" 66 | serde = { version = "1.0", features = ["alloc", "derive"], default-features = false } 67 | serde_json = { version = "1.0" } 68 | slog = { version = "2.7", features = ["max_level_trace", "release_max_level_trace"] } 69 | slog-async = "2.8" 70 | slog-envlogger = "2.2" 71 | slog-term = "2.9" 72 | smoltcp = { version = "0.11", default-features = false } 73 | syn = "2" 74 | tabwriter = { version = "1", features = ["ansi_formatting"] } 75 | thiserror = "2.0" 76 | toml = "0.9" 77 | uuid = { version = "1.0", default-features = false, features = ["serde"]} 78 | usdt = "0.6" 79 | version_check = "0.9" 80 | zerocopy = { version = "0.8", features = ["derive"] } 81 | zone = { git = "https://github.com/oxidecomputer/zone" } 82 | ztest = { git = "https://github.com/oxidecomputer/falcon", branch = "main" } 83 | poptrie = { git = "https://github.com/oxidecomputer/poptrie", branch = "multipath" } 84 | 85 | [profile.dev] 86 | opt-level = 1 87 | 88 | [profile.release] 89 | debug = 2 90 | lto = "thin" 91 | 92 | [profile.release-lto] 93 | inherits = "release" 94 | lto = true 95 | codegen-units = 1 96 | 97 | -------------------------------------------------------------------------------- /.github/buildomat/jobs/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #: 3 | #: name = "test" 4 | #: variety = "basic" 5 | #: target = "helios-2.0" 6 | #: output_rules = [ 7 | #: "/work/*.log", 8 | #: ] 9 | #: 10 | #: [dependencies.xde] 11 | #: job = "opte-xde" 12 | #: 13 | 14 | #### >>>>>>>>>>>>>>>>>>>>>>>>>>>> Local Usage >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 15 | #### 16 | #### If you are running this locally, you must run the xde.sh job first to have 17 | #### the artifacts at the expected spot. 18 | #### 19 | #### <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 20 | 21 | set -o xtrace 22 | 23 | pfexec pkg install brand/omicron1 brand/omicron1/tools opte 24 | 25 | if [[ -z $BUILDOMAT_JOB_ID ]]; then 26 | echo Note: if you are running this locally, you must run the xde.sh job first 27 | echo to have the artifacts at the expected spot. 28 | pfexec mkdir -p /input/xde 29 | pfexec ln -s /work /input/xde/work 30 | fi 31 | 32 | pfexec /usr/lib/brand/omicron1/baseline -w /var/run/brand/omicron1/baseline 33 | 34 | function cleanup { 35 | pfexec chown -R `id -un`:`id -gn` . 36 | if [[ -z $BUILDOMAT_JOB_ID ]]; then 37 | pfexec rm -rf /input/xde 38 | fi 39 | } 40 | trap cleanup EXIT 41 | 42 | function get_artifact { 43 | repo=$1 44 | series=$2 45 | commit=$3 46 | name=$4 47 | url=https://buildomat.eng.oxide.computer/public/file/oxidecomputer 48 | 49 | mkdir -p download 50 | pushd download 51 | if [[ ! -f $name ]]; then 52 | curl -fOL $url/$repo/$series/$commit/$name 53 | fi 54 | popd 55 | } 56 | 57 | banner "collect" 58 | get_artifact softnpu image 88f5f1334364e5580fe778c44ac0746a35927351 softnpu 59 | get_artifact sidecar-lite release 3fff53ae549ab1348b680845693e66b224bb5d2f libsidecar_lite.so 60 | get_artifact sidecar-lite release 3fff53ae549ab1348b680845693e66b224bb5d2f scadm 61 | 62 | if [[ $DOWNLOAD_ONLY -eq 1 ]]; then 63 | exit 0; 64 | fi 65 | 66 | uname -a 67 | cat /etc/versions/build 68 | 69 | dladm 70 | ipadm 71 | 72 | pfexec rem_drv xde 73 | 74 | set -o errexit 75 | set -o pipefail 76 | 77 | banner "prepare" 78 | pfexec cp /input/xde/work/xde.conf /kernel/drv/ 79 | pfexec cp /input/xde/work/release/xde /kernel/drv/amd64 80 | pfexec add_drv xde 81 | 82 | banner "test" 83 | pfexec chmod +x /input/xde/work/test/loopback 84 | pfexec /input/xde/work/test/loopback --nocapture 85 | 86 | # Multicast tests must run with --test-threads=1 because they share 87 | # hardcoded device names (xde_test_sim0/1, xde_test_vnic0/1) that conflict 88 | # when tests run in parallel 89 | pfexec chmod +x /input/xde/work/test/multicast_rx 90 | pfexec /input/xde/work/test/multicast_rx --nocapture --test-threads=1 91 | 92 | pfexec chmod +x /input/xde/work/test/multicast_multi_sub 93 | pfexec /input/xde/work/test/multicast_multi_sub --nocapture --test-threads=1 94 | 95 | pfexec chmod +x /input/xde/work/test/multicast_validation 96 | pfexec /input/xde/work/test/multicast_validation --nocapture --test-threads=1 97 | 98 | banner "teardown" 99 | # Ensure full driver teardown is exercised after tests complete 100 | pfexec rem_drv xde 101 | -------------------------------------------------------------------------------- /.github/buildomat/jobs/bench.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #: 3 | #: name = "bench" 4 | #: variety = "basic" 5 | #: target = "helios-2.0" 6 | #: rust_toolchain = true 7 | #: output_rules = [ 8 | #: "=/work/bench-results.tgz", 9 | #: ] 10 | #: 11 | #: [[publish]] 12 | #: series = "benchmark" 13 | #: name = "bench-results.tgz" 14 | #: from_output = "/work/bench-results.tgz" 15 | #: 16 | #: [dependencies.xde] 17 | #: job = "opte-xde" 18 | #: 19 | 20 | #### >>>>>>>>>>>>>>>>>>>>>>>>>>>> Local Usage >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 21 | #### 22 | #### If you are running this locally, you must run the xde.sh job first to have 23 | #### the artifacts at the expected spot. 24 | #### 25 | #### <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 26 | 27 | set -o xtrace 28 | 29 | source .github/buildomat/common.sh 30 | 31 | pfexec pkg install brand/omicron1 brand/omicron1/tools opte iperf demangle flamegraph 32 | 33 | if [[ -z $BUILDOMAT_JOB_ID ]]; then 34 | echo Note: if you are running this locally, you must run the xde.sh job first 35 | echo to have the artifacts at the expected spot. 36 | pfexec mkdir -p /input/xde 37 | pfexec ln -s /work /input/xde/work 38 | fi 39 | 40 | pfexec /usr/lib/brand/omicron1/baseline -w /var/run/brand/omicron1/baseline 41 | 42 | function cleanup { 43 | pfexec chown -R `id -un`:`id -gn` . 44 | if [[ -z $BUILDOMAT_JOB_ID ]]; then 45 | pfexec rm -rf /input/xde 46 | fi 47 | } 48 | trap cleanup EXIT 49 | 50 | function get_artifact { 51 | local curl_res 52 | 53 | repo=$1 54 | series=$2 55 | commit=$3 56 | name=$4 57 | url=https://buildomat.eng.oxide.computer/public/file/oxidecomputer 58 | 59 | mkdir -p download 60 | pushd download 61 | if [[ ! -f $name ]]; then 62 | curl -fOL $url/$repo/$series/$commit/$name 63 | curl_res=$? 64 | fi 65 | popd 66 | 67 | return $curl_res 68 | } 69 | 70 | OUT_DIR=/work/bench-results 71 | 72 | mkdir -p $OUT_DIR 73 | mkdir -p target/criterion 74 | mkdir -p target/xde-bench 75 | 76 | banner "collect" 77 | 78 | # If we're on a PR, compare against master. 79 | # If we're on master, compare against our parent. 80 | BASELINE_COMMIT=`cat .git/refs/heads/master` 81 | if [[ $GITHUB_BRANCH == "master" ]]; then 82 | BASELINE_COMMIT=`git log --pretty=%P -n 1 "$GITHUB_BRANCH"` 83 | fi 84 | 85 | if get_artifact opte benchmark $BASELINE_COMMIT bench-results.tgz; then 86 | # Illumos tar seems to lack --strip/--strip-components. 87 | tar -xf download/bench-results.tgz -C target 88 | mv target/bench-results/* target/ 89 | rm -r target/bench-results 90 | else 91 | echo "Baseline results not found for branch 'master'. Running without comparison." 92 | fi 93 | 94 | if [[ $DOWNLOAD_ONLY -eq 1 ]]; then 95 | exit 0; 96 | fi 97 | 98 | uname -a 99 | cat /etc/versions/build 100 | 101 | dladm 102 | ipadm 103 | 104 | pfexec rem_drv xde 105 | 106 | set -o errexit 107 | set -o pipefail 108 | 109 | banner "prepare" 110 | pfexec cp /input/xde/work/xde.conf /kernel/drv/ 111 | pfexec cp /input/xde/work/release/xde /kernel/drv/amd64 112 | pfexec add_drv xde 113 | 114 | banner "bench" 115 | cargo kbench local 116 | cargo ubench 117 | 118 | cp -r target/criterion $OUT_DIR 119 | cp -r target/xde-bench $OUT_DIR 120 | 121 | pushd /work 122 | tar -caf bench-results.tgz bench-results 123 | popd 124 | -------------------------------------------------------------------------------- /dtrace/README.adoc: -------------------------------------------------------------------------------- 1 | = DTrace Scripts 2 | 3 | This directory contains DTrace scripts for monitoring and debugging 4 | various aspects of OPTE's operation. It also contains the `opte-trace` 5 | script which makes it more convenient to run these scripts; otherwise 6 | you have to make sure to pass the correct DTrace flags to import the 7 | local library file and common header. 8 | 9 | Some scripts also include USDT versions. Those can be useful when 10 | running unit/integration tests. 11 | 12 | |=== 13 | |Script |Description 14 | 15 | a|`opte-bad-packet.d` 16 | |Trace "bad" packets as they occur. A bad packet is merly one that we 17 | failed to parse, typically because it had an unexpected structure. 18 | This probe may fire quite often given xde's current use of 19 | promiscuous mode; and that is to be expected until we can make use of 20 | mac flows (see 21 | https://github.com/oxidecomputer/opte/issues/61[opte#61]). 22 | 23 | a|`opte-flow-expire.d` 24 | |Track flows as they expire. This includes the Unified Flow Table 25 | (UFT), Layer Flow Tables (LFTs), and the TCP flow table. 26 | 27 | a|`opte-gen-desc-fail.d` 28 | |Track `Action::Stateful::gen_desc()` failures. A failure here 29 | typically indicates failure to acquire a finite resource due to 30 | exhaustion. 31 | 32 | a|`opte-gen-ht-fail.d` 33 | |Track `StaticAction::gen_ht()` failures. 34 | 35 | a|`opte-guest-loopback.d` 36 | |Track packets that travel the "guest loopback" path. This is the path 37 | taken when two VPC guests live on the same sled. 38 | 39 | a|`opte-ht.d` 40 | |Track header transformations as they happen. This only tracks rule 41 | processing, it does not track transformations that take place in the 42 | hot-path (UFT hit). 43 | 44 | a|`opte-ioctl.d` 45 | |Track opte ioctl commands as they happen. 46 | 47 | a|`opte-layer-process.d` 48 | |Track a flow as it is processed by the different layers. This only 49 | applies to packets that do not match a flow in the UFT. 50 | 51 | a|`opte-next-hop.d` 52 | |Track the Oxide Rack Network (the "physical" network to the VPC) next 53 | hop resolution. 54 | 55 | a|`opte-port-process.d` 56 | |Track packet processing results as they happen. Print the port, 57 | direction, epoch, before/after flow ID, the packet length, and the 58 | result of processing. 59 | 60 | a|`opte-rule-match.d` 61 | |Track rule match/no-match as it happens. Printing the direction, 62 | layer, and flow ID of the match/no-match, as well as the resulting 63 | `Action` of the match. 64 | 65 | a|`opte-tcp-flow-state.d` 66 | |Track the TCP flow state changes as they happen. Printing the state 67 | transition as well as the flow ID. 68 | 69 | a|`opte-mcast-delivery.d` 70 | |Track multicast Tx/Rx, local same-sled delivery, underlay forwarding, and 71 | external forwarding. Also tracks multicast control-plane operations (map 72 | set/clear, fwd set/clear, subscribe/unsubscribe, and dumps) to help correlate 73 | config changes with dataplane events. Optional toggles are in the script's 74 | BEGIN block: `flow_debug` (adds xde_mc_tx entry/return), `suppress_output` 75 | (suppress per-event output), and `show_summary` (show aggregations at END). 76 | 77 | a|`opte-uft-invalidate.d` 78 | |Track Unified Flow Table invalidation as it happens. A UFT entry is 79 | invalidated whenever the rule set has been updated causing the port's 80 | epoch to leap ahead of the UFT entry's epoch value. 81 | 82 | a|`protos.d` 83 | |This doesn't track anything, it's just a nice way to share IP 84 | protocol definitions across all scripts. 85 | |=== 86 | -------------------------------------------------------------------------------- /crates/kstat-macro/src/lib.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2022 Oxide Computer Company 6 | 7 | use proc_macro::TokenStream; 8 | use quote::format_ident; 9 | use quote::quote; 10 | use syn::DeriveInput; 11 | use syn::Field; 12 | use syn::FieldsNamed; 13 | use syn::FieldsUnnamed; 14 | use syn::Ident; 15 | use syn::parse_macro_input; 16 | 17 | /// Generate a `KStatProvider` implementation given a struct of named 18 | /// fields of type `KStatU64`. 19 | /// 20 | /// ```Rust 21 | /// #[derive(KStatProvider)] 22 | /// struct PortStats { 23 | /// in_pkts: KStatU64, 24 | /// out_pkts: KStatU64, 25 | /// } 26 | /// ``` 27 | /// 28 | /// This macro generates the following code based on the struct above. 29 | /// 30 | /// ```Rust 31 | /// impl KStatProvider for PortStats { 32 | /// const NUM_FIELDS: u32 = 2; 33 | /// 34 | /// fn init(&mut self) -> result::Result<(), kstat::Error> { 35 | /// self.in_pkts.init("in_pkts")?; 36 | /// self.in_drop.init("out_pkts")?; 37 | /// Ok(()) 38 | /// } 39 | /// 40 | /// fn new() -> Self { 41 | /// Self { 42 | /// in_pkts: KStatU64::new(), 43 | /// out_pkts: KStatU64::new(), 44 | /// } 45 | /// } 46 | /// } 47 | /// ```` 48 | #[proc_macro_derive(KStatProvider)] 49 | pub fn derive_kstat_provider(input: TokenStream) -> TokenStream { 50 | let DeriveInput { ident, data, .. } = parse_macro_input!(input); 51 | let fields: Vec = match data { 52 | syn::Data::Struct(s) => match s.fields { 53 | syn::Fields::Named(FieldsNamed { named, .. }) => { 54 | named.into_iter().collect() 55 | } 56 | 57 | syn::Fields::Unnamed(FieldsUnnamed { unnamed: _, .. }) => { 58 | panic!("A KStatProvider cannot have unnamed fields"); 59 | } 60 | 61 | syn::Fields::Unit => { 62 | panic!("A unit struct cannot be a KStatProvider"); 63 | } 64 | }, 65 | 66 | _ => panic!("Only a struct may be a KStatProvider"), 67 | }; 68 | 69 | let num_fields = fields.len() as u32; 70 | let fields_ident: Vec = 71 | fields.iter().map(|f| f.ident.clone().unwrap()).collect(); 72 | let ident_snap = format_ident!("{}Snap", ident); 73 | 74 | let output = quote! { 75 | #[derive(Clone, Debug)] 76 | pub struct #ident_snap { 77 | #( pub #fields_ident: u64, )* 78 | } 79 | 80 | impl KStatProvider for #ident { 81 | const NUM_FIELDS: u32 = #num_fields; 82 | type Snap = #ident_snap; 83 | 84 | fn init( 85 | &mut self 86 | ) -> core::result::Result<(), ::opte::ddi::kstat::Error> { 87 | #( self.#fields_ident.init(stringify!(#fields_ident))?; )* 88 | Ok(()) 89 | } 90 | 91 | fn new() -> Self { 92 | use ::opte::ddi::kstat::KStatU64; 93 | 94 | Self { 95 | #( #fields_ident: KStatU64::new(), )* 96 | } 97 | } 98 | 99 | fn snapshot(&self) -> Self::Snap { 100 | #ident_snap { 101 | #( #fields_ident: self.#fields_ident.val(), )* 102 | } 103 | } 104 | } 105 | }; 106 | 107 | output.into() 108 | } 109 | -------------------------------------------------------------------------------- /lib/opte/src/engine/arp.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2025 Oxide Computer Company 6 | 7 | //! ARP headers and data. 8 | 9 | use super::ether::Ethernet; 10 | use crate::ddi::mblk::MsgBlk; 11 | use core::fmt; 12 | use core::fmt::Display; 13 | use ingot::Ingot; 14 | use ingot::ethernet::Ethertype; 15 | use ingot::types::NetworkRepr; 16 | use ingot::types::primitives::u16be; 17 | use opte_api::Ipv4Addr; 18 | use opte_api::MacAddr; 19 | use serde::Deserialize; 20 | use serde::Serialize; 21 | use zerocopy::ByteSlice; 22 | 23 | pub const ARP_HTYPE_ETHERNET: u16 = 1; 24 | 25 | #[derive( 26 | Clone, 27 | Copy, 28 | Debug, 29 | Deserialize, 30 | Eq, 31 | Ord, 32 | PartialEq, 33 | PartialOrd, 34 | Serialize, 35 | Hash, 36 | )] 37 | pub struct ArpOp(u16); 38 | 39 | impl ArpOp { 40 | pub const REQUEST: Self = Self(1); 41 | pub const REPLY: Self = Self(2); 42 | } 43 | 44 | impl Default for ArpOp { 45 | fn default() -> Self { 46 | Self::REQUEST 47 | } 48 | } 49 | 50 | impl Display for ArpOp { 51 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 52 | let s = match *self { 53 | ArpOp::REQUEST => "Request", 54 | ArpOp::REPLY => "Reply", 55 | _ => "Unknown", 56 | }; 57 | write!(f, "{s}") 58 | } 59 | } 60 | 61 | impl NetworkRepr> for ArpOp { 62 | fn to_network(self) -> zerocopy::U16 { 63 | self.0.into() 64 | } 65 | 66 | fn from_network(val: zerocopy::U16) -> Self { 67 | Self(val.into()) 68 | } 69 | } 70 | 71 | /// Generate an ARP reply from SHA/SPA to THA/TPA. 72 | pub fn gen_arp_reply( 73 | sha: MacAddr, 74 | spa: Ipv4Addr, 75 | tha: MacAddr, 76 | tpa: Ipv4Addr, 77 | ) -> MsgBlk { 78 | MsgBlk::new_ethernet_pkt(( 79 | Ethernet { destination: tha, source: sha, ethertype: Ethertype::ARP }, 80 | ArpEthIpv4 { 81 | op: ArpOp::REPLY, 82 | sha, 83 | spa, 84 | tha, 85 | tpa, 86 | ..Default::default() 87 | }, 88 | )) 89 | } 90 | 91 | /// An ARP packet containing Ethernet (MAC) to IPv4 address mappings. 92 | #[derive(Copy, Clone, Debug, Eq, Hash, PartialEq, Ingot)] 93 | #[ingot(impl_default)] 94 | pub struct ArpEthIpv4 { 95 | #[ingot(default = ARP_HTYPE_ETHERNET)] 96 | pub htype: u16be, 97 | #[ingot(default = Ethertype::IPV4, is = "u16be")] 98 | pub ptype: Ethertype, 99 | #[ingot(default = size_of::() as u8)] 100 | pub hlen: u8, 101 | #[ingot(default = size_of::() as u8)] 102 | pub plen: u8, 103 | 104 | #[ingot(is = "u16be")] 105 | pub op: ArpOp, 106 | 107 | #[ingot(is = "[u8; 6]")] 108 | pub sha: MacAddr, 109 | #[ingot(is = "[u8; 4]")] 110 | pub spa: Ipv4Addr, 111 | 112 | #[ingot(is = "[u8; 6]")] 113 | pub tha: MacAddr, 114 | #[ingot(is = "[u8; 4]")] 115 | pub tpa: Ipv4Addr, 116 | } 117 | 118 | impl ValidArpEthIpv4 { 119 | pub fn values_valid(&self) -> bool { 120 | self.htype() == ARP_HTYPE_ETHERNET 121 | && self.ptype() == Ethertype::IPV4 122 | && self.hlen() == (size_of::() as u8) 123 | && self.plen() == (size_of::() as u8) 124 | && (self.op() == ArpOp::REQUEST || self.op() == ArpOp::REPLY) 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /lib/oxide-vpc/src/engine/gateway/transit.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2024 Oxide Computer Company 6 | 7 | //! Utility functions to allow a port to permit traffic on an 8 | //! additional set of CIDR blocks, e.g. to enable transit for 9 | //! VPC-wide VPN traffic. 10 | 11 | use super::*; 12 | use crate::api::RemoveCidrResp; 13 | use crate::engine::VpcNetwork; 14 | use opte::api::IpCidr; 15 | use opte::api::NoResp; 16 | use opte::engine::port::Port; 17 | use opte::engine::rule::Finalized; 18 | 19 | fn make_holepunch_rule( 20 | guest_mac: MacAddr, 21 | gateway_mac: MacAddr, 22 | dest: IpCidr, 23 | dir: Direction, 24 | vpc_mappings: Arc, 25 | ) -> Rule { 26 | let (cidr_in_pred, cidr_out_pred) = match dest { 27 | IpCidr::Ip4(v4) => ( 28 | Predicate::InnerDstIp4(vec![Ipv4AddrMatch::Prefix(v4)]), 29 | Predicate::InnerSrcIp4(vec![Ipv4AddrMatch::Prefix(v4)]), 30 | ), 31 | IpCidr::Ip6(v6) => ( 32 | Predicate::InnerDstIp6(vec![Ipv6AddrMatch::Prefix(v6)]), 33 | Predicate::InnerSrcIp6(vec![Ipv6AddrMatch::Prefix(v6)]), 34 | ), 35 | }; 36 | 37 | match dir { 38 | Direction::In => { 39 | let mut cidr_in = Rule::new( 40 | 1000, 41 | Action::Static(Arc::new(RewriteSrcMac { gateway_mac })), 42 | ); 43 | cidr_in.add_predicate(cidr_in_pred); 44 | cidr_in.add_predicate(Predicate::InnerEtherDst(vec![ 45 | EtherAddrMatch::Exact(guest_mac), 46 | ])); 47 | 48 | cidr_in.finalize() 49 | } 50 | Direction::Out => { 51 | let vpc_meta = Arc::new(VpcMeta::new(vpc_mappings)); 52 | let mut cidr_out = Rule::new(1000, Action::Meta(vpc_meta)); 53 | cidr_out.add_predicate(Predicate::InnerEtherSrc(vec![ 54 | EtherAddrMatch::Exact(guest_mac), 55 | ])); 56 | cidr_out.add_predicate(cidr_out_pred); 57 | 58 | cidr_out.finalize() 59 | } 60 | } 61 | } 62 | 63 | /// Allows a guest to send or receive traffic on a CIDR block 64 | /// other than their private IP. 65 | pub fn allow_cidr( 66 | port: &Port, 67 | dest: IpCidr, 68 | dir: Direction, 69 | vpc_mappings: Arc, 70 | ) -> Result { 71 | let rule = make_holepunch_rule( 72 | port.mac_addr(), 73 | port.network().cfg.gateway_mac, 74 | dest, 75 | dir, 76 | vpc_mappings, 77 | ); 78 | port.add_rule(NAME, dir, rule)?; 79 | Ok(NoResp::default()) 80 | } 81 | 82 | /// Prevents a guest from sending/receiving traffic on a CIDR block 83 | /// other than their private IP. 84 | pub fn remove_cidr( 85 | port: &Port, 86 | dest: IpCidr, 87 | dir: Direction, 88 | vpc_mappings: Arc, 89 | ) -> Result { 90 | let rule = make_holepunch_rule( 91 | port.mac_addr(), 92 | port.network().cfg.gateway_mac, 93 | dest, 94 | dir, 95 | vpc_mappings, 96 | ); 97 | 98 | let maybe_id = port.find_rule(NAME, dir, &rule)?; 99 | if let Some(id) = maybe_id { 100 | port.remove_rule(NAME, dir, id)?; 101 | } 102 | 103 | Ok(if maybe_id.is_none() { 104 | RemoveCidrResp::NotFound 105 | } else { 106 | RemoveCidrResp::Ok(dest) 107 | }) 108 | } 109 | -------------------------------------------------------------------------------- /bench/src/iperf.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2024 Oxide Computer Company 6 | 7 | //! Utilities for parsing iPerf JSON output. 8 | 9 | use serde::Deserialize; 10 | use serde::Serialize; 11 | use std::collections::BTreeMap; 12 | use std::net::IpAddr; 13 | 14 | #[derive(Clone, Debug, Deserialize, Serialize)] 15 | pub struct Output { 16 | pub start: StartSession, 17 | pub intervals: Vec, 18 | pub end: EndSession, 19 | } 20 | 21 | #[derive(Clone, Debug, Deserialize, Serialize)] 22 | pub struct StartSession { 23 | pub connected: Vec, 24 | pub version: String, 25 | pub system_info: String, 26 | pub timestamp: Time, 27 | pub connecting_to: Host, 28 | pub cookie: String, 29 | pub tcp_mss_default: Option, 30 | pub target_bitrate: Option, 31 | pub fq_rate: Option, 32 | pub sock_bufsize: Option, 33 | pub sndbuf_actual: Option, 34 | pub rcvbuf_actual: Option, 35 | pub test_start: TestStart, 36 | } 37 | 38 | #[derive(Clone, Debug, Deserialize, Serialize)] 39 | pub struct Session { 40 | pub socket: u64, 41 | pub local_host: IpAddr, 42 | pub local_port: u16, 43 | pub remote_host: IpAddr, 44 | pub remote_port: u16, 45 | } 46 | 47 | #[derive(Clone, Debug, Deserialize, Serialize)] 48 | pub struct Host { 49 | pub host: IpAddr, 50 | pub port: u16, 51 | } 52 | 53 | #[derive(Clone, Debug, Deserialize, Serialize)] 54 | pub struct Time { 55 | pub time: String, 56 | pub timesecs: u64, 57 | } 58 | 59 | #[derive(Clone, Debug, Deserialize, Serialize)] 60 | pub struct TestStart { 61 | pub protocol: Protocol, 62 | pub num_streams: u64, 63 | pub blksize: u64, 64 | pub omit: u64, 65 | pub duration: u64, 66 | pub bytes: u64, 67 | pub blocks: u64, 68 | pub reverse: u64, 69 | pub tos: Option, 70 | pub target_bitrate: Option, 71 | pub bidir: Option, 72 | pub fqrate: Option, 73 | } 74 | 75 | #[derive(Clone, Debug, Deserialize, Serialize)] 76 | #[serde(rename_all = "UPPERCASE")] 77 | pub enum Protocol { 78 | Tcp, 79 | Udp, 80 | Sctp, 81 | } 82 | 83 | #[derive(Clone, Debug, Deserialize, Serialize)] 84 | pub struct Interval { 85 | pub streams: Vec, 86 | pub sum: Stat, 87 | } 88 | 89 | #[derive(Clone, Debug, Deserialize, Serialize)] 90 | pub struct StreamStat { 91 | pub socket: u64, 92 | #[serde(flatten)] 93 | pub stat: Stat, 94 | } 95 | 96 | #[derive(Clone, Debug, Deserialize, Serialize)] 97 | pub struct Stat { 98 | pub start: f64, 99 | pub end: f64, 100 | pub seconds: f64, 101 | pub bytes: u64, 102 | pub bits_per_second: f64, 103 | pub jitter_ms: Option, 104 | pub lost_packets: Option, 105 | pub packets: Option, 106 | pub lost_percent: Option, 107 | #[serde(default)] 108 | pub omitted: bool, 109 | pub sender: Option, 110 | } 111 | 112 | #[derive(Clone, Debug, Deserialize, Serialize)] 113 | pub struct CpuStat { 114 | pub host_total: f64, 115 | pub host_user: f64, 116 | pub host_system: f64, 117 | pub remote_total: f64, 118 | pub remote_user: f64, 119 | pub remote_system: f64, 120 | } 121 | 122 | #[derive(Clone, Debug, Deserialize, Serialize)] 123 | pub struct EndSession { 124 | pub streams: Vec>, 125 | pub sum_sent: Stat, 126 | pub sum_received: Stat, 127 | pub cpu_utilization_percent: CpuStat, 128 | } 129 | 130 | #[cfg(test)] 131 | mod test { 132 | use super::*; 133 | 134 | #[test] 135 | fn iperf_output_parse() { 136 | let _val: Output = 137 | serde_json::from_str(include_str!("test/mac-iperf-sender.json")) 138 | .unwrap(); 139 | 140 | let _val2: Output = 141 | serde_json::from_str(include_str!("test/mac-iperf-receiver.json")) 142 | .unwrap(); 143 | } 144 | } 145 | -------------------------------------------------------------------------------- /lib/oxide-vpc/src/engine/gateway/dhcp.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2024 Oxide Computer Company 6 | 7 | //! The DHCP implementation of the Virtual Gateway. 8 | 9 | use crate::cfg::Ipv4Cfg; 10 | use crate::cfg::VpcCfg; 11 | use alloc::sync::Arc; 12 | use opte::api::DhcpCfg; 13 | use opte::api::DhcpReplyType; 14 | use opte::api::Direction; 15 | use opte::api::Ipv4Addr; 16 | use opte::api::Ipv4PrefixLen; 17 | use opte::api::OpteError; 18 | use opte::api::SubnetRouterPair; 19 | use opte::engine::dhcp::DhcpAction; 20 | use opte::engine::ip::v4::Ipv4Cidr; 21 | use opte::engine::layer::Layer; 22 | use opte::engine::rule::Action; 23 | use opte::engine::rule::Rule; 24 | 25 | pub fn setup( 26 | layer: &mut Layer, 27 | cfg: &VpcCfg, 28 | ip_cfg: &Ipv4Cfg, 29 | dhcp_cfg: DhcpCfg, 30 | ) -> Result<(), OpteError> { 31 | // All guest interfaces live on a `/32`-network in the Oxide VPC; 32 | // restricting the L2 domain to two nodes: the guest NIC and the 33 | // OPTE Port. This allows OPTE to act as the gateway for which all 34 | // guest traffic must cross, no matter the destination. In order 35 | // to achieve this we use something called a "local subnet route". 36 | // This is a router entry that maps a "local subnet" to the router 37 | // `0.0.0.0`. If you read RFC 3442, you'll see the original 38 | // intention for this type of route is to allow different subnets 39 | // on the same link (L2 segment) to communicate with each other. 40 | // In our case we place the guest in a network of 1, meaning the 41 | // router itself must be on a different subnet. However, since the 42 | // router, in this case OPTE, is on the same link, we can use the local 43 | // subnet route feature to deliver packets to the router. 44 | // 45 | // * `re1`: The local subnet router entry; mapping the gateway 46 | // subnet to `0.0.0.0.`. 47 | // 48 | // * `re2`: The default router entry; mapping all packets to the 49 | // gateway. 50 | // 51 | // You might wonder why the `re2` entry is needed when we have the 52 | // `Router Option (code 3)`. RFC 3442 specifies the following: 53 | // 54 | // > If the DHCP server returns both a Classless Static Routes 55 | // > option and a Router option, the DHCP client MUST ignore the 56 | // > Router option. 57 | // 58 | // Furthermore, RFC 3442 goes on to say that a DHCP server 59 | // administrator should always set both to be on the safe side. 60 | let gw_cidr = Ipv4Cidr::new(ip_cfg.gateway_ip, Ipv4PrefixLen::NETMASK_ALL); 61 | let re1 = SubnetRouterPair::new(gw_cidr, Ipv4Addr::ANY_ADDR); 62 | let re2 = SubnetRouterPair::new( 63 | Ipv4Cidr::new(Ipv4Addr::ANY_ADDR, Ipv4PrefixLen::NETMASK_NONE), 64 | ip_cfg.gateway_ip, 65 | ); 66 | 67 | let offer = Action::Hairpin(Arc::new(DhcpAction { 68 | client_mac: cfg.guest_mac, 69 | client_ip: ip_cfg.private_ip, 70 | subnet_prefix_len: Ipv4PrefixLen::NETMASK_ALL, 71 | gw_mac: cfg.gateway_mac, 72 | gw_ip: ip_cfg.gateway_ip, 73 | reply_type: DhcpReplyType::Offer, 74 | re1, 75 | re2: Some(re2), 76 | re3: None, 77 | dhcp_cfg: dhcp_cfg.clone(), 78 | })); 79 | 80 | let ack = Action::Hairpin(Arc::new(DhcpAction { 81 | client_mac: cfg.guest_mac, 82 | client_ip: ip_cfg.private_ip, 83 | subnet_prefix_len: Ipv4PrefixLen::NETMASK_ALL, 84 | gw_mac: cfg.gateway_mac, 85 | gw_ip: ip_cfg.gateway_ip, 86 | reply_type: DhcpReplyType::Ack, 87 | re1, 88 | re2: Some(re2), 89 | re3: None, 90 | dhcp_cfg, 91 | })); 92 | 93 | let discover_rule = Rule::new(1, offer); 94 | layer.add_rule(Direction::Out, discover_rule.finalize()); 95 | 96 | let request_rule = Rule::new(1, ack); 97 | layer.add_rule(Direction::Out, request_rule.finalize()); 98 | Ok(()) 99 | } 100 | -------------------------------------------------------------------------------- /lib/oxide-vpc/tests/fuzz_regression.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2024 Oxide Computer Company 6 | 7 | //! Fuzz regression tests. 8 | //! 9 | //! These tests capture past known-bad packets which have made some part 10 | //! of OPTE panic in the past, and ensure that it does not today. 11 | 12 | use opte::ddi::mblk::MsgBlk; 13 | use opte::engine::packet::Packet; 14 | use oxide_vpc::engine::VpcParser; 15 | use serde::Deserialize; 16 | use serde::Serialize; 17 | use std::collections::HashMap; 18 | use std::ffi::OsStr; 19 | use std::path::Path; 20 | 21 | #[derive(Debug, Clone, Deserialize, Serialize)] 22 | struct Case { 23 | description: String, 24 | packet: String, 25 | } 26 | 27 | #[derive(Debug, Clone, Hash, Eq, PartialEq)] 28 | struct Label { 29 | family: String, 30 | name: String, 31 | } 32 | 33 | fn run_tests( 34 | root_dir: &str, 35 | test_fn: impl Fn(&[u8]) + std::panic::RefUnwindSafe, 36 | ) { 37 | let base_resource_path = 38 | Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/resources"); 39 | 40 | // Find all test descriptions in tests/resources/$root_dir. 41 | let mut tests: HashMap = HashMap::new(); 42 | let my_test_dir = base_resource_path.join(root_dir); 43 | for entry in std::fs::read_dir(my_test_dir) 44 | .unwrap_or_else(|e| panic!("failed to find directory {root_dir}: {e}")) 45 | { 46 | let entry = entry.unwrap_or_else(|e| { 47 | panic!("failed to enumerate child of {root_dir}: {e}") 48 | }); 49 | 50 | let path_owned = entry.path(); 51 | let path = path_owned.as_path(); 52 | if path.extension() != Some("ron".as_ref()) { 53 | continue; 54 | } 55 | 56 | let contents = std::fs::read_to_string(path).unwrap_or_else(|e| { 57 | panic!("failed to read contents of {}: {e}", path.display()) 58 | }); 59 | 60 | let cases: HashMap = ron::from_str(&contents) 61 | .unwrap_or_else(|e| { 62 | panic!("failed to parse {}: {e}", path.display()) 63 | }); 64 | 65 | let family = 66 | path.file_stem().and_then(OsStr::to_str).unwrap_or(""); 67 | 68 | tests.extend( 69 | cases 70 | .into_iter() 71 | .map(|(name, v)| (Label { family: family.into(), name }, v)), 72 | ) 73 | } 74 | 75 | // Run all captured tests. 76 | let mut pkt_path = base_resource_path.join("data"); 77 | for (label, case) in tests { 78 | let Label { family, name } = label; 79 | pkt_path.push(&case.packet); 80 | let data = std::fs::read(&pkt_path).unwrap_or_else(|e| { 81 | panic!( 82 | "{root_dir}, {family}/{name}: could not read data from {}: {e}", 83 | pkt_path.as_path().display(), 84 | ) 85 | }); 86 | pkt_path.pop(); 87 | 88 | if let Err(e) = std::panic::catch_unwind(|| test_fn(&data[..])) { 89 | let case_str; 90 | let case_fmt = if case.description.is_empty() { 91 | "" 92 | } else { 93 | case_str = format!("\n -- {}", case.description); 94 | case_str.as_str() 95 | }; 96 | eprintln!( 97 | "\nFuzz regression failure in: \ 98 | {family}/{name}{case_fmt}\n\n\ 99 | Packet {}:\n\ 100 | {:x?}", 101 | case.packet, 102 | &data[..] 103 | ); 104 | 105 | std::panic::resume_unwind(e) 106 | } 107 | } 108 | } 109 | 110 | #[test] 111 | fn parse_in_regression() { 112 | run_tests("parse_in", |data| { 113 | let mut msg = MsgBlk::copy(data); 114 | let _ = Packet::parse_inbound(msg.iter_mut(), VpcParser {}); 115 | }); 116 | } 117 | 118 | #[test] 119 | fn parse_out_regression() { 120 | run_tests("parse_out", |data| { 121 | let mut msg = MsgBlk::copy(data); 122 | let _ = Packet::parse_outbound(msg.iter_mut(), VpcParser {}); 123 | }); 124 | } 125 | -------------------------------------------------------------------------------- /.github/buildomat/jobs/xde.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #: 3 | #: name = "opte-xde" 4 | #: variety = "basic" 5 | #: target = "helios-2.0" 6 | #: rust_toolchain = true 7 | #: output_rules = [ 8 | #: "=/work/debug/xde.dbg", 9 | #: "=/work/debug/xde.dbg.sha256", 10 | #: "=/work/debug/xde_link.dbg.so", 11 | #: "=/work/debug/xde_link.dbg.so.sha256", 12 | #: "=/work/release/xde", 13 | #: "=/work/release/xde.sha256", 14 | #: "=/work/release/xde_link.so", 15 | #: "=/work/release/xde_link.so.sha256", 16 | #: "=/work/test/loopback", 17 | #: "=/work/test/multicast_rx", 18 | #: "=/work/test/multicast_multi_sub", 19 | #: "=/work/test/multicast_validation", 20 | #: "=/work/xde.conf", 21 | #: ] 22 | #: 23 | #: [[publish]] 24 | #: series = "module" 25 | #: name = "xde" 26 | #: from_output = "/work/release/xde" 27 | #: 28 | #: [[publish]] 29 | #: series = "module" 30 | #: name = "xde.sha256" 31 | #: from_output = "/work/release/xde.sha256" 32 | 33 | set -o errexit 34 | set -o pipefail 35 | set -o xtrace 36 | 37 | source .github/buildomat/common.sh 38 | 39 | # 40 | # TGT_BASE allows one to run this more easily in their local 41 | # environment: 42 | # 43 | # TGT_BASE=/var/tmp ./xde.sh 44 | # 45 | TGT_BASE=${TGT_BASE:=/work} 46 | 47 | DBG_SRC=target/x86_64-unknown-unknown/debug 48 | DBG_LINK_SRC=target/i686-unknown-illumos/debug 49 | DBG_TGT=$TGT_BASE/debug 50 | 51 | REL_SRC=target/x86_64-unknown-unknown/release-lto 52 | REL_LINK_SRC=target/i686-unknown-illumos/release 53 | REL_TGT=$TGT_BASE/release 54 | 55 | mkdir -p $DBG_TGT $REL_TGT 56 | 57 | cargo --version 58 | rustc --version 59 | 60 | install_pkg jq 61 | 62 | pushd xde 63 | 64 | cp xde.conf /work/xde.conf 65 | 66 | header "check style" 67 | ptime -m cargo +$NIGHTLY fmt -p xde -p xde-link -- --check 68 | 69 | header "analyze" 70 | ptime -m cargo clippy -- \ 71 | --allow clippy::uninlined-format-args --allow clippy::bad_bit_mask 72 | 73 | pushd xde-link 74 | ptime -m cargo clippy -- \ 75 | --allow clippy::uninlined-format-args --allow clippy::bad_bit_mask 76 | popd 77 | 78 | popd 79 | 80 | header "build xde (debug)" 81 | ptime -m cargo xtask build --profile debug xde xde-link 82 | 83 | header "build xde (release)" 84 | ptime -m cargo xtask build --profile release xde xde-link 85 | 86 | # 87 | # Inspect the kernel module for bad relocations in case the old 88 | # codegen issue ever shows its face again. 89 | # 90 | if elfdump $DBG_SRC/xde.dbg | grep GOTPCREL; then 91 | echo "found GOTPCREL relocation in debug build" 92 | exit 1 93 | fi 94 | 95 | if elfdump $REL_SRC/xde | grep GOTPCREL; then 96 | echo "found GOTPCREL relocation in release build" 97 | exit 1 98 | fi 99 | 100 | cp $DBG_SRC/xde.dbg $DBG_TGT/ 101 | sha256sum $DBG_TGT/xde.dbg > $DBG_TGT/xde.dbg.sha256 102 | 103 | cp $DBG_LINK_SRC/libxde_link.so $DBG_TGT/xde_link.dbg.so 104 | sha256sum $DBG_TGT/xde_link.dbg.so > $DBG_TGT/xde_link.dbg.so.sha256 105 | 106 | cp $REL_SRC/xde $REL_TGT/ 107 | sha256sum $REL_TGT/xde > $REL_TGT/xde.sha256 108 | 109 | cp $REL_LINK_SRC/libxde_link.so $REL_TGT/xde_link.so 110 | sha256sum $REL_TGT/xde_link.so > $REL_TGT/xde_link.so.sha256 111 | 112 | 113 | header "build xde integration tests" 114 | pushd xde-tests 115 | cargo +$NIGHTLY fmt -- --check 116 | cargo clippy --all-targets 117 | cargo build --test loopback 118 | loopback_test=$( 119 | cargo build -q --test loopback --message-format=json |\ 120 | jq -r "select(.profile.test == true) | .filenames[]" 121 | ) 122 | cargo build --test multicast_rx 123 | multicast_rx_test=$( 124 | cargo build -q --test multicast_rx --message-format=json |\ 125 | jq -r "select(.profile.test == true) | .filenames[]" 126 | ) 127 | cargo build --test multicast_multi_sub 128 | multicast_multi_sub_test=$( 129 | cargo build -q --test multicast_multi_sub --message-format=json |\ 130 | jq -r "select(.profile.test == true) | .filenames[]" 131 | ) 132 | cargo build --test multicast_validation 133 | multicast_validation_test=$( 134 | cargo build -q --test multicast_validation --message-format=json |\ 135 | jq -r "select(.profile.test == true) | .filenames[]" 136 | ) 137 | mkdir -p /work/test 138 | cp $loopback_test /work/test/loopback 139 | cp $multicast_rx_test /work/test/multicast_rx 140 | cp $multicast_multi_sub_test /work/test/multicast_multi_sub 141 | cp $multicast_validation_test /work/test/multicast_validation 142 | -------------------------------------------------------------------------------- /xde/README.md: -------------------------------------------------------------------------------- 1 | XDE 2 | === 3 | 4 | This is an OPTE driver that is also a MAC provider. This driver was developed as 5 | a parallel crate to opte-drv but the idea is that this work will merge back into 6 | opte-drv. The name is just something I chose to not conflict with the existing 7 | opte driver. 8 | 9 | ## Requirements 10 | 11 | - You'll need 12 | [this branch](https://github.com/oxidecomputer/illumos-gate/tree/xde) 13 | of illumos. **This is not just for the kernel, but also for `dladm` and the 14 | libraries `dladm` links to.** 15 | 16 | - The driver currently assumes there are two underlay network devices it can use 17 | for off-node I/O. This is easily achieved in a VM with viona devices. 18 | 19 | ## Building 20 | 21 | The following builds the `xde` loadable kernel module and places it at 22 | `target/x86_64-unknown-unknown/release-lto/` – this command should be run from the workspace root. 23 | 24 | ``` 25 | cargo xtask build xde 26 | ``` 27 | 28 | The file src/ip.rs is autogenerated, to generate it use `ip-bindgen.sh`. 29 | 30 | ## Installation 31 | 32 | OPTE can be installed using the following command from the workspace root: 33 | 34 | ``` 35 | cargo xtask install 36 | ``` 37 | 38 | The xtask automates building xde and required links (if not done already), as well as installation of the driver using [`add_drv(8)`](https://illumos.org/man/8/add_drv). 39 | 40 | A prebuilt module file can alternately be copied to `/kernel/drv/amd64/` on the platform you're testing on _after the first installation_. 41 | If doing so, you will need to remove ([`rem_drv(8)`](https://illumos.org/man/8/rem_drv)) and re-add ([`add_drv(8)`](https://illumos.org/man/8/add_drv)) the module: 42 | 43 | ```bash 44 | rem_drv xde 45 | add_drv xde 46 | ``` 47 | 48 | ## Usage 49 | 50 | Now we can instantiate an xde device. 51 | 52 | **NOTE: there is currently a bug in the driver that makes it so it cannot find 53 | underlay devices unless `dladm` has been run at least once. So before 54 | instantiating an xde device run `dladm`.** 55 | 56 | The following creates an xde device in pass-through mode, meaning packets are not 57 | actually processed by opte-core, they just pass through the xde device to one of 58 | the two underlay devices. Remove the `--passthrough` flag to enable opte 59 | processing. The `vioif0` and `vioif1` arguments are the underlay devices this 60 | xde instance will use for off-node I/O. The first mac/IP combo are the overlay 61 | addresses associated with this xde instance. The second mac/IP combo belong to 62 | the underlay gateway. `fd00:99::1` is the boudnary services gateway address. 63 | `99` is the boundary services Geneve VNI. `10` is the VPC VNI. `fd00:1::1` is 64 | the underlay source address for the servier this xde is running on. 65 | 66 | ``` 67 | ./opteadm xde-create \ 68 | --passthrough \ 69 | xde0 \ 70 | vioif0 vioif1 \ 71 | A8:40:25:ff:00:01 10.0.0.1 \ 72 | A8:40:25:00:00:01 172.30.0.5 \ 73 | fd00:99::1 99 \ 74 | 10 \ 75 | fd00:1::1 76 | ``` 77 | 78 | For operating without pass-through mode, there are not currently enough ioctls 79 | from opte-drv present in xde to do anything besides drop packets. However, I 80 | have verified that when pass-through is not enabled, the data does make it's way 81 | into opte-core where it's determined (correctly due to lack of configuration) 82 | that all packets shall be dropped and nothing escapes to the underlay devices. 83 | 84 | For local testing we can layer a vnic atop this xde instance. 85 | 86 | ``` 87 | dladm create-vnic -t -l xde0 vnic0 88 | ``` 89 | 90 | Along with an IP address to match the above xde configuration. 91 | 92 | ``` 93 | ipadm create-addr -t -T static -a 10.0.0.1/24 vnic0/v4 94 | ``` 95 | 96 | You should now see something like this from `dladm` 97 | 98 | ``` 99 | # dladm 100 | LINK CLASS MTU STATE BRIDGE OVER 101 | vioif0 phys 1500 up -- -- 102 | vioif1 phys 1500 up -- -- 103 | xde0 xde 1500 up -- -- 104 | vnic0 vnic 1500 up -- xde0 105 | ``` 106 | 107 | If there is a host connected to this machine on `vioif0` with the address 108 | `10.0.0.2/24` you should now be able to ping that address. 109 | 110 | To tear down this setup 111 | ``` 112 | ipadm delete-addr vnic0/v4 113 | ipadm delete-if vnic0 114 | dladm delete-vnic -t vnic0 115 | ./opteadm xde-delete xde0 116 | ``` 117 | -------------------------------------------------------------------------------- /crates/opte-api/src/mac.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2025 Oxide Computer Company 6 | 7 | use alloc::str::FromStr; 8 | use alloc::string::String; 9 | use alloc::vec::Vec; 10 | use core::fmt; 11 | use core::fmt::Debug; 12 | use core::fmt::Display; 13 | use core::ops::Deref; 14 | use ingot::types::NetworkRepr; 15 | use serde::Deserialize; 16 | use serde::Serialize; 17 | 18 | /// A MAC address. 19 | #[derive( 20 | Clone, 21 | Copy, 22 | Default, 23 | Deserialize, 24 | Eq, 25 | Ord, 26 | PartialEq, 27 | PartialOrd, 28 | Serialize, 29 | Hash, 30 | )] 31 | pub struct MacAddr { 32 | inner: [u8; 6], 33 | } 34 | 35 | impl NetworkRepr<[u8; 6]> for MacAddr { 36 | fn to_network(self) -> [u8; 6] { 37 | self.inner 38 | } 39 | 40 | fn from_network(val: [u8; 6]) -> Self { 41 | Self { inner: val } 42 | } 43 | } 44 | 45 | impl MacAddr { 46 | pub const BROADCAST: Self = Self { inner: [0xFF; 6] }; 47 | pub const ZERO: Self = Self { inner: [0x00; 6] }; 48 | 49 | /// Return the bytes of the MAC address. 50 | #[inline] 51 | pub fn bytes(&self) -> [u8; 6] { 52 | self.inner 53 | } 54 | 55 | pub const fn from_const(bytes: [u8; 6]) -> Self { 56 | Self { inner: bytes } 57 | } 58 | 59 | /// Return whether this MAC address is a group address (I/G bit set). 60 | /// 61 | /// Per IEEE 802, the I/G (Individual/Group) bit is the LSB of the first octet. 62 | /// When set to 1, the address is a group address, which includes both 63 | /// multicast and broadcast (FF:FF:FF:FF:FF:FF) addresses. 64 | /// 65 | /// See [RFC 7042 §2.1] for details on IEEE 802 MAC address structure. 66 | /// 67 | /// [RFC 7042 §2.1]: https://www.rfc-editor.org/rfc/rfc7042#section-2.1 68 | pub const fn is_group(&self) -> bool { 69 | (self.inner[0] & 0b0000_0001) != 0 70 | } 71 | } 72 | 73 | impl From for smoltcp::wire::EthernetAddress { 74 | fn from(addr: MacAddr) -> Self { 75 | Self(addr.bytes()) 76 | } 77 | } 78 | 79 | impl From<[u8; 6]> for MacAddr { 80 | fn from(bytes: [u8; 6]) -> Self { 81 | Self { inner: bytes } 82 | } 83 | } 84 | 85 | impl From<&[u8; 6]> for MacAddr { 86 | fn from(bytes: &[u8; 6]) -> Self { 87 | Self { inner: *bytes } 88 | } 89 | } 90 | 91 | impl AsRef<[u8]> for MacAddr { 92 | fn as_ref(&self) -> &[u8] { 93 | &self.inner 94 | } 95 | } 96 | 97 | impl Deref for MacAddr { 98 | type Target = [u8]; 99 | fn deref(&self) -> &Self::Target { 100 | &self.inner 101 | } 102 | } 103 | 104 | impl FromStr for MacAddr { 105 | type Err = String; 106 | 107 | fn from_str(s: &str) -> Result { 108 | let octets: Vec = s 109 | .split(':') 110 | .map(|s| { 111 | u8::from_str_radix(s, 16).map_err(|_| format!("bad octet: {s}")) 112 | }) 113 | .collect::, _>>()?; 114 | 115 | if octets.len() != 6 { 116 | return Err(format!("incorrect number of bytes: {}", octets.len())); 117 | } 118 | 119 | // At the time of writing there is no TryFrom impl for Vec to 120 | // array in the alloc create. Honestly this looks a bit 121 | // cleaner anyways. 122 | let bytes = 123 | [octets[0], octets[1], octets[2], octets[3], octets[4], octets[5]]; 124 | 125 | Ok(MacAddr { inner: bytes }) 126 | } 127 | } 128 | 129 | impl Display for MacAddr { 130 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 131 | write!( 132 | f, 133 | "{:02X}:{:02X}:{:02X}:{:02X}:{:02X}:{:02X}", 134 | self.inner[0], 135 | self.inner[1], 136 | self.inner[2], 137 | self.inner[3], 138 | self.inner[4], 139 | self.inner[5] 140 | ) 141 | } 142 | } 143 | 144 | // There's no reason to view the MAC address as its raw array, so just 145 | // present it in a human-friendly manner. 146 | impl Debug for MacAddr { 147 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 148 | write!(f, "MacAddr {{ inner: {self} }}") 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /lib/oxide-vpc/src/engine/gateway/icmpv6.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2025 Oxide Computer Company 6 | 7 | //! The ICMPv6 implementation of the Virtual Gateway. 8 | 9 | use crate::cfg::Ipv6Cfg; 10 | use crate::cfg::VpcCfg; 11 | use alloc::sync::Arc; 12 | use opte::api::Direction; 13 | use opte::api::Ipv6Addr; 14 | use opte::api::OpteError; 15 | use opte::engine::icmp::v6::Icmpv6EchoReply; 16 | use opte::engine::icmp::v6::NeighborAdvertisement; 17 | use opte::engine::icmp::v6::RouterAdvertisement; 18 | use opte::engine::layer::Layer; 19 | use opte::engine::predicate::Predicate; 20 | use opte::engine::rule::Action; 21 | use opte::engine::rule::Rule; 22 | use smoltcp::wire::Icmpv6Message; 23 | 24 | // Add support for ICMPv6: 25 | // 26 | // - Respond to echo requests from the guest to the gateway. The source address 27 | // may be either any link-local address in guest (since we can't know how they 28 | // generate that) or its assigned VPC-private address. The destination address 29 | // must be the link-local address we derive for OPTE, from the EUI-64 transform 30 | // on its MAC address. 31 | // 32 | // - Respond to NDP Router Solicitations from the guest to the gateway. 33 | // 34 | // - Respond to NDP Neighbor Solicitations from the guest to the gateway. This 35 | // includes solicitations unicast to the gateway, and also delivered to the 36 | // solicited-node multicast group. 37 | pub fn setup( 38 | layer: &mut Layer, 39 | cfg: &VpcCfg, 40 | ip_cfg: &Ipv6Cfg, 41 | ) -> Result<(), OpteError> { 42 | let dst_ip = Ipv6Addr::from_eui64(&cfg.gateway_mac); 43 | let hairpins = [ 44 | // We need to hairpin echo requests from either the VPC-private or 45 | // link-local address of the guest, to OPTE's link-local. 46 | Action::Hairpin(Arc::new(Icmpv6EchoReply { 47 | src_mac: cfg.guest_mac, 48 | src_ip: ip_cfg.private_ip, 49 | dst_mac: cfg.gateway_mac, 50 | dst_ip, 51 | })), 52 | Action::Hairpin(Arc::new(Icmpv6EchoReply { 53 | src_mac: cfg.guest_mac, 54 | src_ip: Ipv6Addr::from_eui64(&cfg.guest_mac), 55 | dst_mac: cfg.gateway_mac, 56 | dst_ip, 57 | })), 58 | // Map an NDP Router Solicitation from the guest to a Router Advertisement 59 | // from the OPTE virtual gateway's link-local IPv6 address. 60 | Action::Hairpin(Arc::new(RouterAdvertisement::new( 61 | // From the guest's VPC MAC. 62 | cfg.guest_mac, 63 | // The MAC from which we respond, i.e., OPTE's MAC. 64 | cfg.gateway_mac, 65 | // "Managed Configuration", indicating the guest needs to use DHCPv6 to 66 | // acquire an IPv6 address. 67 | true, 68 | ))), 69 | // Map an NDP Neighbor Solicitation from the guest to a neighbor 70 | // advertisement from the OPTE virtual gateway. Note that this is required 71 | // per RFC 4861 so that the guest does not mark the neighbor failed. 72 | Action::Hairpin(Arc::new(NeighborAdvertisement::new( 73 | // From the guest's VPC MAC. 74 | cfg.guest_mac, 75 | // To OPTE's MAC. 76 | cfg.gateway_mac, 77 | // Set the ROUTER flag to true. 78 | true, 79 | // Respond to solicitations from `::` 80 | true, 81 | ))), 82 | ]; 83 | 84 | // UNWRAP SAFETY: There are far fewer than 65535 rules inserted here. 85 | let next_out_prio = u16::try_from(hairpins.len() + 1).unwrap(); 86 | // Add rules for the above actions. 87 | hairpins.into_iter().enumerate().for_each(|(i, action)| { 88 | let priority = u16::try_from(i + 1).unwrap(); 89 | let rule = Rule::new(priority, action); 90 | layer.add_rule(Direction::Out, rule.finalize()); 91 | }); 92 | 93 | // Filter any uncaught in/out-bound NDP traffic. 94 | let pred = Predicate::Icmpv6MsgType(vec![ 95 | (Icmpv6Message::RouterSolicit.into()..=Icmpv6Message::Redirect.into()) 96 | .into(), 97 | ]); 98 | let in_pred = pred.clone(); 99 | 100 | let mut ndp_filter = Rule::new(next_out_prio, Action::Deny); 101 | ndp_filter.add_predicate(pred); 102 | layer.add_rule(Direction::Out, ndp_filter.finalize()); 103 | 104 | let mut ndp_filter = Rule::new(1, Action::Deny); 105 | ndp_filter.add_predicate(in_pred); 106 | layer.add_rule(Direction::In, ndp_filter.finalize()); 107 | 108 | Ok(()) 109 | } 110 | -------------------------------------------------------------------------------- /bench/src/kbench/mod.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2024 Oxide Computer Company 6 | 7 | //! Utilities used in `cargo kbench`. 8 | 9 | use anyhow::Result; 10 | use serde::Deserialize; 11 | use std::path::Path; 12 | use std::path::PathBuf; 13 | use std::process::Command; 14 | use std::sync::OnceLock; 15 | use workload::IperfConfig; 16 | 17 | pub mod measurement; 18 | pub mod remote; 19 | pub mod workload; 20 | 21 | /// Blocks until a user types the phrase 'exit' on Stdin. 22 | pub fn loop_til_exit() { 23 | let mut cmd = String::new(); 24 | loop { 25 | match std::io::stdin().read_line(&mut cmd) { 26 | Ok(_) if &cmd == "exit\n" => { 27 | break; 28 | } 29 | Ok(_) => { 30 | println!("wanted exit: saw {cmd:?}"); 31 | cmd.clear(); 32 | } 33 | _ => { 34 | break; 35 | } 36 | } 37 | } 38 | } 39 | 40 | /// Ensure the current process is running as root, or elevate using 41 | /// pfexec if needed. 42 | pub fn elevate() -> Result<()> { 43 | if nix::unistd::Uid::current().is_root() { 44 | Ok(()) 45 | } else { 46 | let my_args = std::env::args(); 47 | let mut elevated = Command::new("pfexec").args(my_args).spawn()?; 48 | let exit_code = elevated.wait()?; 49 | std::process::exit(exit_code.code().unwrap_or(1)) 50 | } 51 | } 52 | 53 | /// Print the given multiline string as a formatted box of text. 54 | pub fn print_banner(text: &str) { 55 | let max_len = text.lines().map(str::len).max().unwrap_or_default(); 56 | 57 | println!("###{:->max_len$}###", ""); 58 | for line in text.lines() { 59 | println!(":::{line:^max_len$}:::"); 60 | } 61 | println!("###{:->max_len$}###", ""); 62 | } 63 | 64 | /// Chown a directory to the original user before pfexec was used (which is 65 | /// unchanged in the env_var USER). 66 | pub fn give_ownership() -> Result<()> { 67 | let Ok(user) = std::env::var("USER") else { return Ok(()) }; 68 | 69 | let criterion_path = cargo_target_directory() 70 | .unwrap_or_else(|| Path::new(".").to_path_buf()) 71 | .join("criterion"); 72 | let outputs = [output_base_dir(), criterion_path.as_path()]; 73 | 74 | for path in outputs { 75 | let _ = Command::new("chown").args(["-R", &user]).arg(path).output()?; 76 | } 77 | 78 | Ok(()) 79 | } 80 | 81 | /// Configures where outputs are stored under various run modes. 82 | #[derive(Debug, Clone)] 83 | pub enum OutputConfig<'a> { 84 | Iperf(&'a IperfConfig), 85 | InSitu(&'a str), 86 | } 87 | 88 | impl OutputConfig<'_> { 89 | /// Name of an experiment, used for storing different workloads 90 | /// and measurement types in distinct directories. 91 | pub fn benchmark_group(&self) -> String { 92 | match self { 93 | Self::Iperf(i) => i.benchmark_group(), 94 | Self::InSitu(s) => format!("in-situ/{s}"), 95 | } 96 | } 97 | 98 | /// Title to use in a flamegraph built from a set of measurements. 99 | pub fn title(&self) -> String { 100 | match self { 101 | Self::Iperf(i) => i.title(), 102 | Self::InSitu(s) => format!("Local flamegraph -- {s}"), 103 | } 104 | } 105 | } 106 | 107 | impl<'a> From<&'a IperfConfig> for OutputConfig<'a> { 108 | fn from(value: &'a IperfConfig) -> Self { 109 | Self::Iperf(value) 110 | } 111 | } 112 | 113 | // XXX: lifted verbatim from criterion 114 | /// Returns the Cargo target directory, possibly calling `cargo metadata` to 115 | /// figure it out. 116 | pub fn cargo_target_directory() -> Option { 117 | #[derive(Deserialize)] 118 | struct Metadata { 119 | target_directory: PathBuf, 120 | } 121 | 122 | std::env::var_os("CARGO_TARGET_DIR").map(PathBuf::from).or_else(|| { 123 | let output = Command::new(std::env::var_os("CARGO")?) 124 | .args(["metadata", "--format-version", "1"]) 125 | .output() 126 | .ok()?; 127 | let metadata: Metadata = serde_json::from_slice(&output.stdout).ok()?; 128 | Some(metadata.target_directory) 129 | }) 130 | } 131 | 132 | pub static OUT_DIR: OnceLock = OnceLock::new(); 133 | 134 | pub fn output_base_dir() -> &'static Path { 135 | OUT_DIR 136 | .get_or_init(|| { 137 | let mut out = cargo_target_directory() 138 | .unwrap_or_else(|| Path::new(".").to_path_buf()); 139 | out.push("xde-bench"); 140 | out 141 | }) 142 | .as_path() 143 | } 144 | -------------------------------------------------------------------------------- /crates/opte-api/src/ndp.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2022 Oxide Computer Company 6 | 7 | //! Types for working with the IPv6 Neighbor Discovery Protocol 8 | 9 | use super::Ipv6Addr; 10 | use super::MacAddr; 11 | use core::fmt; 12 | use core::fmt::Debug; 13 | use core::fmt::Display; 14 | 15 | /// A Neighbor Discovery Protocol Router Advertisement, generated in response to 16 | /// a Router Solicitation. 17 | #[derive(Clone, Copy, Debug)] 18 | pub struct RouterAdvertisement { 19 | /// The expected MAC address of the client whose Router Solicitations we 20 | /// respond to. 21 | pub src_mac: MacAddr, 22 | 23 | /// The MAC address advertised by the router. 24 | pub mac: MacAddr, 25 | 26 | // The IPv6 address advertised by the router, which is the EUI-64 transform 27 | // of the router MAC address. 28 | ip: Ipv6Addr, 29 | 30 | /// Managed address configuration, indicating that the peer can use DHCPv6 31 | /// to acquire an IPv6 address. 32 | pub managed_cfg: bool, 33 | } 34 | 35 | impl RouterAdvertisement { 36 | /// Create new `RouterAdvertisement`. 37 | /// 38 | /// The `src_mac` is the expected source MAC address a Router Solicitation 39 | /// should come from. There are no restrictions on the source IP address, 40 | /// other than that it be link-local, in `fe80::/10`. 41 | /// 42 | /// `mac` is the MAC address of the router, to which Solicitations are 43 | /// expected to be addressed, and from which Advertisements are sent. The 44 | /// source IPv6 address of the Advertisement is derived from this, using the 45 | /// EUI-64 transform. 46 | /// 47 | /// `managed_cfg` is set to `true` to indicate that the host can get further 48 | /// configuration from a DHCPv6 server running on the network. 49 | pub fn new(src_mac: MacAddr, mac: MacAddr, managed_cfg: bool) -> Self { 50 | let ip = Ipv6Addr::from_eui64(&mac); 51 | Self { src_mac, mac, ip, managed_cfg } 52 | } 53 | 54 | /// Return the IPv6 address the router sends advertisements from. 55 | pub fn ip(&self) -> &Ipv6Addr { 56 | &self.ip 57 | } 58 | } 59 | 60 | impl Display for RouterAdvertisement { 61 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 62 | write!(f, "NDP RA IPv6={} MAC={}", self.ip, self.mac) 63 | } 64 | } 65 | 66 | /// A Neighbor Discovery Protocol Neighbor Advertisement, generated in response to 67 | /// a Neighbor Solicitation. 68 | #[derive(Clone, Copy, Debug)] 69 | pub struct NeighborAdvertisement { 70 | /// The expected MAC address of the client whose Neighbor Solicitations we 71 | /// respond to. 72 | pub src_mac: MacAddr, 73 | 74 | /// The MAC address advertised by the neighbor. 75 | pub mac: MacAddr, 76 | 77 | // The advertised IPv6 address of the neighbor, which is the EUI-64 78 | // transform of the source MAC address, in `mac`. 79 | ip: Ipv6Addr, 80 | 81 | /// If true, advertise that this neighbor is a router. 82 | pub is_router: bool, 83 | 84 | /// If true, respond to Neighbor Solicitations sent from the unspecified 85 | /// address `::`, in addition to those from a link-local address 86 | /// `fe80::/10`. 87 | pub allow_unspec: bool, 88 | } 89 | 90 | impl NeighborAdvertisement { 91 | /// Create new `NeighborAdvertisement`. 92 | /// 93 | /// The `src_mac` is the expected source MAC address a Neighbor Solicitation 94 | /// should come from. There are no restrictions on the source IP address, 95 | /// other than that it be link-local, in `fe80::/10`. 96 | /// 97 | /// `mac` is the MAC address of the neighbor, to which Solicitations are 98 | /// expected to be addressed, and from which Advertisements are sent. The 99 | /// source IPv6 address of the Advertisement is derived from this, using the 100 | /// EUI-64 transform. 101 | /// 102 | /// `is_router` is `true` if the advert should be marked as coming from a 103 | /// router. 104 | /// 105 | /// `allow_unspec` is `true` if the advertisement is generated in response 106 | /// to Neigbor Solicitations from the unspecified address. 107 | pub fn new( 108 | src_mac: MacAddr, 109 | mac: MacAddr, 110 | is_router: bool, 111 | allow_unspec: bool, 112 | ) -> Self { 113 | let ip = Ipv6Addr::from_eui64(&mac); 114 | Self { src_mac, mac, ip, is_router, allow_unspec } 115 | } 116 | 117 | /// Return the IPv6 address the neighbor sends advertisements from. 118 | pub fn ip(&self) -> &Ipv6Addr { 119 | &self.ip 120 | } 121 | } 122 | 123 | impl Display for NeighborAdvertisement { 124 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 125 | write!(f, "NDP NA IPv6={} MAC={}", self.ip, self.mac) 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /lib/opte/src/engine/port/meta.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2025 Oxide Computer Company 6 | 7 | use alloc::borrow::Cow; 8 | use alloc::collections::BTreeMap; 9 | use alloc::string::String; 10 | 11 | /// A value meant to be used in the [`ActionMeta`] map. 12 | /// 13 | /// The purpose of this trait is to define the value's key as well 14 | /// as serialization to/from strings. These are like Display and 15 | /// FromStr; but here their focus is on unambiguous parsing. That 16 | /// is, we can't necessarily rely on a type's Display impl being 17 | /// good for serializing to a metadata string, but at the same 18 | /// time we don't want to force its Display to have to work in 19 | /// this constraint. 20 | /// 21 | /// A value doesn't have to implement this type; there is nothing 22 | /// that enforces the strings stored in [`ActionMeta`] are strings 23 | /// generated by this trait impl. It's just a convenient way to 24 | /// mark and implement values meant to be used as action metadata. 25 | pub trait ActionMetaValue: Sized { 26 | const KEY: &'static str; 27 | 28 | fn key(&self) -> Cow<'static, str> { 29 | Cow::Borrowed(Self::KEY) 30 | } 31 | 32 | /// Create a representation of the value to be used in 33 | /// [`ActionMeta`]. 34 | fn as_meta(&self) -> Cow<'static, str>; 35 | 36 | /// Attempt to create a value assuming that `s` was created 37 | /// with [`Self::as_meta()`]. 38 | fn from_meta(s: &str) -> Result; 39 | } 40 | 41 | /// The action metadata map. 42 | /// 43 | /// This metadata is accessible by all actions during layer 44 | /// processing and acts as a form of inter-action communication. 45 | /// Given that packets and their metadata are immutable (outside of 46 | /// reified header transforms), this also allows actions to inform 47 | /// OPTE of facts about a path or destination (e.g., MTU). 48 | /// 49 | /// Action metadata is nothing more than a map of string keys 50 | /// to string values. It is up to the actions to decide what these strings 51 | /// mean. However, *all keys prefaced with "opte:" are reserved for use by 52 | /// operations on `ActionMeta`*, and map to functionality in OPTE itself 53 | /// rather than a given dataplane design. 54 | #[derive(Default)] 55 | pub struct ActionMeta { 56 | inner: BTreeMap, Cow<'static, str>>, 57 | } 58 | 59 | impl ActionMeta { 60 | pub fn new() -> Self { 61 | Self::default() 62 | } 63 | 64 | /// Clear all entries. 65 | pub fn clear(&mut self) { 66 | self.inner.clear(); 67 | } 68 | 69 | /// Insert the key-value pair into the map, replacing any 70 | /// existing key-value pair. Return the value being replaced, 71 | /// or `None`. 72 | pub fn insert( 73 | &mut self, 74 | key: Cow<'static, str>, 75 | val: Cow<'static, str>, 76 | ) -> Option> { 77 | self.inner.insert(key, val) 78 | } 79 | 80 | /// Remove the key-value pair with the specified key. Return 81 | /// the value, or `None` if no such entry exists. 82 | pub fn remove(&mut self, key: &str) -> Option> { 83 | self.inner.remove(key) 84 | } 85 | 86 | /// Get a reference to the value with the given key, or `None` 87 | /// if no such entry exists. 88 | pub fn get(&self, key: &str) -> Option<&str> { 89 | self.inner.get(key).map(|v| &**v) 90 | } 91 | 92 | /// Records whether this packet's destination can be reached using only 93 | /// internal/private paths. 94 | /// 95 | /// The dataplane may use this to choose a larger (jumbo-frame) MSS for 96 | /// TCP segmentation, or rely on other aspects of its internal network. 97 | pub fn set_internal_target(&mut self, val: bool) { 98 | _ = self 99 | .insert(InternalTarget::KEY.into(), InternalTarget(val).as_meta()); 100 | } 101 | 102 | /// Returns whether this packet's destination can be reached using only 103 | /// internal/private paths. 104 | pub fn is_internal_target(&self) -> bool { 105 | self.get(InternalTarget::KEY) 106 | .and_then(|v| InternalTarget::from_meta(v).ok()) 107 | .unwrap_or_default() 108 | .0 109 | } 110 | } 111 | 112 | #[derive(Copy, Clone, Default)] 113 | struct InternalTarget(bool); 114 | 115 | impl ActionMetaValue for InternalTarget { 116 | const KEY: &'static str = "opte:internal-target"; 117 | 118 | fn as_meta(&self) -> Cow<'static, str> { 119 | (if self.0 { "1" } else { "0" }).into() 120 | } 121 | 122 | fn from_meta(s: &str) -> Result { 123 | match s { 124 | "1" => Ok(Self(true)), 125 | "0" => Ok(Self(false)), 126 | s => Err(format!("value `{s}` is illegal for InternalTarget")), 127 | } 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /lib/opte-test-utils/src/dhcp.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2024 Oxide Computer Company 6 | 7 | //! Routines for DHCP testing. 8 | 9 | use super::*; 10 | use dhcpv6::protocol::MessageType; 11 | use opte::ddi::mblk::MsgBlk; 12 | use opte::engine::dhcp::DHCP_CLIENT_PORT; 13 | use opte::engine::dhcp::DHCP_SERVER_PORT; 14 | use opte::engine::dhcpv6; 15 | use opte::engine::ether::Ethernet; 16 | use opte::engine::ip::v4::Ipv4; 17 | use opte::engine::ip::v6::Ipv6; 18 | use opte::ingot::ethernet::Ethertype; 19 | use opte::ingot::ip::IpProtocol; 20 | use opte::ingot::udp::Udp; 21 | pub use smoltcp::wire::DhcpMessageType; 22 | pub use smoltcp::wire::DhcpPacket; 23 | pub use smoltcp::wire::DhcpRepr; 24 | 25 | // Build a packet from a DHCPv4 message, from a client to server. 26 | pub fn packet_from_client_dhcpv4_message( 27 | cfg: &VpcCfg, 28 | msg: &DhcpRepr, 29 | ) -> MsgBlk { 30 | let eth = Ethernet { 31 | destination: MacAddr::BROADCAST, 32 | source: cfg.guest_mac, 33 | ethertype: Ethertype::IPV4, 34 | }; 35 | 36 | let ip = Ipv4 { 37 | source: Ipv4Addr::ANY_ADDR, 38 | destination: Ipv4Addr::LOCAL_BCAST, 39 | protocol: IpProtocol::UDP, 40 | total_len: (msg.buffer_len() 41 | + Udp::MINIMUM_LENGTH 42 | + Ipv4::MINIMUM_LENGTH) as u16, 43 | ..Default::default() 44 | }; 45 | 46 | let udp = Udp { 47 | source: DHCP_CLIENT_PORT, 48 | destination: DHCP_SERVER_PORT, 49 | length: (Udp::MINIMUM_LENGTH + msg.buffer_len()) as u16, 50 | ..Default::default() 51 | }; 52 | 53 | let headers = (eth, ip, udp); 54 | let total_len = msg.buffer_len() + headers.packet_length(); 55 | 56 | let mut pkt = MsgBlk::new_ethernet(total_len); 57 | pkt.emit_back(&headers).unwrap(); 58 | let dhcp_off = pkt.len(); 59 | pkt.resize(total_len).unwrap(); 60 | let mut dhcp_pkt = DhcpPacket::new_checked(&mut pkt[dhcp_off..]).unwrap(); 61 | msg.emit(&mut dhcp_pkt).unwrap(); 62 | 63 | pkt 64 | } 65 | 66 | // Build a packet from a DHCPv6 message, from a client to server. 67 | pub fn packet_from_client_dhcpv6_message( 68 | cfg: &VpcCfg, 69 | msg: &dhcpv6::protocol::Message<'_>, 70 | ) -> MsgBlk { 71 | let eth = Ethernet { 72 | destination: dhcpv6::ALL_RELAYS_AND_SERVERS.multicast_mac().unwrap(), 73 | source: cfg.guest_mac, 74 | ethertype: Ethertype::IPV6, 75 | }; 76 | 77 | let ip = Ipv6 { 78 | source: Ipv6Addr::from_eui64(&cfg.guest_mac), 79 | destination: dhcpv6::ALL_RELAYS_AND_SERVERS, 80 | next_header: IpProtocol::UDP, 81 | payload_len: (msg.buffer_len() + Udp::MINIMUM_LENGTH) as u16, 82 | ..Default::default() 83 | }; 84 | 85 | let udp = Udp { 86 | source: dhcpv6::CLIENT_PORT, 87 | destination: dhcpv6::SERVER_PORT, 88 | length: ip.payload_len, 89 | ..Default::default() 90 | }; 91 | 92 | write_dhcpv6_packet(eth, ip, udp, msg) 93 | } 94 | 95 | pub fn write_dhcpv6_packet( 96 | eth: Ethernet, 97 | ip: Ipv6, 98 | udp: Udp, 99 | msg: &dhcpv6::protocol::Message<'_>, 100 | ) -> MsgBlk { 101 | let headers = (eth, ip, udp); 102 | let total_len = msg.buffer_len() + headers.packet_length(); 103 | 104 | let mut pkt = MsgBlk::new_ethernet(total_len); 105 | pkt.emit_back(&headers).unwrap(); 106 | let dhcp_off = pkt.len(); 107 | pkt.resize(total_len).unwrap(); 108 | msg.copy_into(&mut pkt[dhcp_off..]).unwrap(); 109 | 110 | pkt 111 | } 112 | 113 | pub fn dhcpv6_with_reasonable_defaults( 114 | typ: MessageType, 115 | rapid_commit: bool, 116 | cfg: &VpcCfg, 117 | ) -> dhcpv6::protocol::Message<'_> { 118 | let requested_iana = dhcpv6::options::IaNa { 119 | id: dhcpv6::options::IaId(0xff7), 120 | t1: dhcpv6::Lifetime(3600), 121 | t2: dhcpv6::Lifetime(6200), 122 | options: vec![], 123 | }; 124 | 125 | let extra_options = 126 | &[dhcpv6::options::Code::DnsServers, dhcpv6::options::Code::DomainList]; 127 | let oro = dhcpv6::options::OptionRequest(extra_options.as_slice().into()); 128 | let base_options = vec![ 129 | dhcpv6::options::Option::ClientId(dhcpv6::Duid::from(&cfg.guest_mac)), 130 | dhcpv6::options::Option::ElapsedTime(dhcpv6::options::ElapsedTime(10)), 131 | dhcpv6::options::Option::IaNa(requested_iana.clone()), 132 | dhcpv6::options::Option::OptionRequest(oro), 133 | ]; 134 | 135 | let mut options = base_options.clone(); 136 | if rapid_commit { 137 | options.push(dhcpv6::options::Option::RapidCommit); 138 | } 139 | if typ == dhcpv6::protocol::MessageType::Request { 140 | options.push(dhcpv6::options::Option::ServerId(dhcpv6::Duid::from( 141 | &cfg.gateway_mac, 142 | ))); 143 | } 144 | 145 | dhcpv6::protocol::Message { 146 | typ, 147 | xid: dhcpv6::TransactionId::from(&[0u8, 1, 2]), 148 | options, 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /bench/README.adoc: -------------------------------------------------------------------------------- 1 | = OPTE Benchmarks 2 | 3 | OPTE maintains two sets of benchmarks: userland microbenchmarks, and kernel module benchmarks. 4 | Userland benchmarks can be run on most development machines, while the kernel module benchmarks will require a full Helios install and additional lab setup depending on what benchmarks you want to run. 5 | 6 | Benchmark outputs are located in `opte/target/criterion`, and any flamegraphs built during kmod benchmarks are placed into `opte/target/xde-bench`. 7 | 8 | == Userland Benchmarks 9 | 10 | We use https://github.com/bheisler/criterion.rs[`criterion`] to measure and profile individual packet processing times for slow-/fast-path traffic as well as generated hairpin packets. 11 | 12 | These can be called using `cargo ubench`, or `cargo bench --package opte-bench --bench userland -- `. 13 | This benchmark runner uses the standard criterion CLI. 14 | To see a clean list of available benchmarks, use the `cargo ubench --list 2> /dev/null | sort | uniq` command. 15 | 16 | Benchmarks are split into several categories: 17 | 18 | * Metric: `wallclock`, `alloc_ct`, `alloc_sz`. 19 | * Action: `parse`, `process`. 20 | * Packet family. 21 | 22 | == Kernel Module Benchmarks 23 | 24 | The kernel module benchmarks can be called using `cargo kbench`, or `cargo bench --package opte-bench --bench xde -- `. 25 | They require that: 26 | 27 | * you are running on an up-to-date Helios instance. 28 | * the XDE kernel module and `opteadm` are installed, either via IPS or the `cargo xtask install` command. 29 | * you have installed the IPS packages `flamegraph`, `demangle`, `iperf` and `sparse`. 30 | 31 | They implement zont-to-zone iperf traffic in two scenarios: 32 | 33 | * `cargo kbench local` on one machine. 34 | This uses an identical test setup to `xde-tests/loopback`. 35 | Two sparse zones will be created on the current machine, with simnet links being used as an underlay network. 36 | This is lower fidelity than the below two-node setup. 37 | * `cargo kbench server` and `cargo kbench remote ` on two separate machines. 38 | One zone will be created on each machine (running an iperf server and client respectively), using the shared lab/home network to exchange link local addresses. 39 | 40 | Below you can find a lab setup which suffices for the second option. 41 | Currently, linklocals must be created with the name syntax `/ll`: this can be done using, e.g., `pfexec ipadm create-addr igb0/ll -T addrconf`. 42 | The benchmark defaults to using the NICs `igb0` and `igb1`, and can be overridden to match your setup using the `--underlay-nics` option. 43 | E.g., when testing over a Chelsio NIC `--underlay-nics cxgbe0 cxgbe1` will select these devices and use the link-local addresses `cxgbe0/ll` and `cxgbe1/ll`. 44 | Additionally, MTUs should be set to `9000` for physical underlay links. 45 | 46 | [source] 47 | fe80::a236:9fff:fe0c:2586 fe80::a236:9fff:fe0c:25b6 48 | fe80::a236:9fff:fe0c:2587 fe80::a236:9fff:fe0c:25b7 49 | ┌─────────────────────────────────────┐ 50 | │ │ 51 | │ ┌─────────────────┐ │ 52 | │ │ │ │ 53 | igb0┌┴┐ ┌┴┐igb1 igb1┌┴┐ ┌┴┐igb0 54 | ╔═╩═╩═══════╩═╩═╗ ╔═╩═╩═══════╩═╩═╗ 55 | ║ cargo kbench ║░ ║ cargo kbench ║░ 56 | ║ remote ║░ ║ server ║░ 57 | ║ 10.0.125.173 ║░ ║ ║░ 58 | ╚══════╦═╦══════╝░ ╚══════╦═╦══════╝░ 59 | ░░░░░░░│░░░░░░░░░ ░░░░░░░│░░░░░░░░░ 60 | 10.0.147.187/8 10.0.125.173/8 61 | │ ┌ ─ ─ ─ ─ ─ ┐ │ 62 | Lab/Home 63 | └ ─ ─ ▶│ Network │◀ ─ ─ ─ ┘ 64 | ─ ─ ─ ─ ─ ─ 65 | 66 | Connecting `igb0<->igb0`, etc., is not a requirement, as NDP tables are inspected for inserting underlay network routes. 67 | 68 | In both scenarios, the benchmark harness will run iperf in client-to-server and server-to-client modes, and will record periodic stack information and timings using `dtrace`. 69 | These are converted into flamegraphs and timing data for further analysis by criterion. 70 | 71 | === In-situ measurement 72 | 73 | The kernel module benchmark harness can be moved onto a gimlet or other development system for measurement. 74 | The path to the binary can be found using the command: 75 | 76 | [source, bash] 77 | ---- 78 | cargo bench --package opte-bench \ 79 | --no-run --message-format json-render-diagnostics \ 80 | | jq -r -s "map( \ 81 | select(.reason==\"compiler-artifact\") \ 82 | | select( \ 83 | .target.kind\ 84 | | map_values(.==\"bench\") \ 85 | | any \ 86 | ) \ 87 | | select(.target.name==\"xde\") \ 88 | ) | map(.executable)" 89 | ---- 90 | 91 | Once the binary is moved onto the global zone of a target machine, measurements can be taken using `xde in-situ`. 92 | On a gimlet we add the `-d` flag as we do not have access to `flamegraph`. 93 | This places captured stacks into the `xde-bench` folder. 94 | 95 | [source, bash] 96 | ---- 97 | $ ./xde in-situ expt-name -d 98 | # ... 99 | exit 100 | 101 | $ ls -R xde-bench 102 | xde-bench: 103 | expt-name 104 | 105 | xde-bench/expt-name: 106 | histos.out raw.stacks 107 | 108 | ---- 109 | 110 | Measured data in `xde-bench` can be moved and processed into flamegraphs and histograms on any development machine using the command `./xde in-situ expt-name -c none`. 111 | -------------------------------------------------------------------------------- /xde/src/lib.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2024 Oxide Computer Company 6 | 7 | // xde - A mac provider for OPTE-based network implementations. 8 | #![feature(extern_types)] 9 | #![no_std] 10 | #![allow(non_upper_case_globals)] 11 | // XXX We do not use double in the kernel. We should not allow 12 | // "improper C types". This hack is here is because of the ip.rs code 13 | // generated by bindgen. It brings in a bunch of stuff we do not use. 14 | // At some point we could hand write the stuff that is actually 15 | // needed, or come up with a better solution like using CTF data to 16 | // generate Rust types for only the stuff we need. 17 | #![allow(improper_ctypes)] // for long double -> u128 18 | #![allow(non_camel_case_types)] // for bindgen code in ip.rs 19 | #![allow(non_snake_case)] // for bindgen code in ip.rs 20 | #![feature(alloc_error_handler)] 21 | #![feature(rustc_private)] 22 | #![feature(maybe_uninit_slice)] 23 | #![deny(unused_must_use)] 24 | 25 | mod ioctl; 26 | 27 | #[macro_use] 28 | extern crate alloc; 29 | 30 | use alloc::ffi::CString; 31 | use core::alloc::GlobalAlloc; 32 | use core::alloc::Layout; 33 | use core::panic::PanicInfo; 34 | use illumos_sys_hdrs::CE_WARN; 35 | use illumos_sys_hdrs::KM_SLEEP; 36 | use illumos_sys_hdrs::c_void; 37 | use illumos_sys_hdrs::cmn_err; 38 | use illumos_sys_hdrs::kmem_alloc; 39 | use illumos_sys_hdrs::kmem_free; 40 | use illumos_sys_hdrs::panic; 41 | use illumos_sys_hdrs::size_t; 42 | 43 | pub mod dev_map; 44 | pub mod dls; 45 | pub mod ip; 46 | pub mod mac; 47 | pub mod postbox; 48 | pub mod route; 49 | pub mod secpolicy; 50 | pub mod stats; 51 | pub mod sys; 52 | pub mod xde; 53 | 54 | // On alignment, `kmem_alloc(9F)` has this of offer: 55 | // 56 | // > The allocated memory is at least double-word aligned, so it can 57 | // > hold any C data structure. No greater alignment can be assumed. 58 | // 59 | // I really hate when documentation uses "word", because that seems to 60 | // mean different things in different contexts. In this case I have to 61 | // assume it means native integer size, or 32-bit in the case our our 62 | // AMD64 kernel. This implis all allocations are at least 8-byte 63 | // aligned, but could be more. However, the last sentence in the quote 64 | // above says that you cannot assume alignment is ever greater than 8 65 | // bytes. Therefore, it seems best to assume it's 8 bytes. For the 66 | // purposes of implementing GlobalAlloc, I believe this means that we 67 | // should return NULL for any Layout which requests more than 8-byte 68 | // alignment (or probably just panic since I never expect this). 69 | // Furthermore, things that can use smaller alignment will just have 70 | // to live with the larger alignment. 71 | struct KmemAlloc; 72 | 73 | unsafe impl GlobalAlloc for KmemAlloc { 74 | unsafe fn alloc(&self, layout: Layout) -> *mut u8 { 75 | if layout.align() > 8 { 76 | panic!("kernel alloc greater than 8-byte alignment"); 77 | } 78 | unsafe { kmem_alloc(layout.size(), KM_SLEEP) as *mut u8 } 79 | } 80 | 81 | unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { 82 | unsafe { kmem_free(ptr as *mut c_void, layout.size() as size_t) } 83 | } 84 | } 85 | 86 | #[panic_handler] 87 | fn panic_hdlr(info: &PanicInfo) -> ! { 88 | let msg = CString::new(format!("{info}")).expect("cstring new"); 89 | unsafe { 90 | cmn_err(CE_WARN, msg.as_ptr()); 91 | panic(msg.as_ptr()); 92 | } 93 | } 94 | 95 | // The GlobalAlloc is using KM_SLEEP; we can never hit this. However, the 96 | // compiler forces us to define it, so we do. 97 | #[alloc_error_handler] 98 | fn alloc_error(_: Layout) -> ! { 99 | panic!("allocation error"); 100 | } 101 | 102 | #[global_allocator] 103 | static A: KmemAlloc = KmemAlloc; 104 | 105 | // This is a hack to get around the fact that liballoc includes 106 | // calls to _Unwind_Resume, supposedly because it is not compiled 107 | // with `panic=abort`. This is all a little bit beyond me but I just 108 | // want to satisfy the symbol resolution so I can load this module. 109 | // 110 | // https://github.com/rust-lang/rust/issues/47493 111 | #[allow(non_snake_case)] 112 | #[unsafe(no_mangle)] 113 | fn _Unwind_Resume() -> ! { 114 | panic!("_Unwind_Resume called"); 115 | } 116 | 117 | // NOTE: We allow unused_unsafe so these macros can be used freely in 118 | // unsafe and non-unsafe functions. 119 | #[macro_export] 120 | macro_rules! warn { 121 | ($format:expr) => { 122 | let msg = ::alloc::ffi::CString::new(format!($format)).unwrap(); 123 | #[allow(unused_unsafe)] 124 | unsafe { ::illumos_sys_hdrs::cmn_err(::illumos_sys_hdrs::CE_WARN, msg.as_ptr()) }; 125 | }; 126 | ($format:expr, $($args:expr),*) => { 127 | let msg = ::alloc::ffi::CString::new(format!($format, $($args),*)).unwrap(); 128 | #[allow(unused_unsafe)] 129 | unsafe { ::illumos_sys_hdrs::cmn_err(::illumos_sys_hdrs::CE_WARN, msg.as_ptr()) }; 130 | }; 131 | } 132 | 133 | #[macro_export] 134 | macro_rules! note { 135 | ($format:expr) => { 136 | let msg = ::alloc::ffi::CString::new(format!($format)); 137 | ::illumos_sys_hdrs::cmn_err(::illumos_sys_hdrs::CE_NOTE, msg.as_ptr()); 138 | }; 139 | ($format:expr, $($args:expr),*) => { 140 | let msg = ::alloc::ffi::CString::new(format!($format, $($args),*)); 141 | ::illumos_sys_hdrs::cmn_err(::illumos_sys_hdrs::CE_NOTE, msg.as_ptr()); 142 | }; 143 | } 144 | -------------------------------------------------------------------------------- /bench/src/kbench/workload.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2024 Oxide Computer Company 6 | 7 | use super::*; 8 | use measurement::Instrumentation; 9 | 10 | #[allow(dead_code)] 11 | #[derive(Debug, Clone)] 12 | pub enum IperfMode { 13 | ClientSend, 14 | ServerSend, 15 | // TODO: need an updated illumos package. 16 | // we can build and install locally and just call 17 | // /usr/local/iperf3 if need be. 18 | BiDir, 19 | } 20 | 21 | impl std::fmt::Display for IperfMode { 22 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 23 | f.write_str(match self { 24 | IperfMode::ClientSend => "Client->Server", 25 | IperfMode::ServerSend => "Server->Client", 26 | IperfMode::BiDir => "Bidirectional", 27 | }) 28 | } 29 | } 30 | 31 | impl Default for IperfMode { 32 | fn default() -> Self { 33 | Self::ClientSend 34 | } 35 | } 36 | 37 | #[allow(dead_code)] 38 | #[derive(Debug, Clone)] 39 | pub enum IperfProto { 40 | Tcp, 41 | Udp { 42 | /// Target bandwidth in MiB/s. 43 | bw: f64, 44 | /// Size of the UDP send buffer. 45 | /// 46 | /// Should be under 1500 due to dont_fragment. 47 | pkt_sz: usize, 48 | }, 49 | } 50 | 51 | impl std::fmt::Display for IperfProto { 52 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 53 | match self { 54 | IperfProto::Tcp => f.write_str("TCP"), 55 | IperfProto::Udp { bw, pkt_sz } => { 56 | write!(f, "UDP({pkt_sz}B, {bw}MiB/s)") 57 | } 58 | } 59 | } 60 | } 61 | 62 | impl Default for IperfProto { 63 | fn default() -> Self { 64 | Self::Tcp 65 | } 66 | } 67 | 68 | #[derive(Debug, Clone)] 69 | pub struct IperfConfig { 70 | pub instrumentation: Instrumentation, 71 | pub n_iters: usize, 72 | pub mode: IperfMode, 73 | pub proto: IperfProto, 74 | pub expt_name: String, 75 | pub n_streams: Option, 76 | } 77 | 78 | impl Default for IperfConfig { 79 | fn default() -> Self { 80 | Self { 81 | instrumentation: Instrumentation::Dtrace, 82 | n_iters: 10, 83 | mode: IperfMode::default(), 84 | proto: IperfProto::default(), 85 | expt_name: "unspec".into(), 86 | n_streams: None, 87 | } 88 | } 89 | } 90 | 91 | impl IperfConfig { 92 | /// Return the command 93 | pub fn cmd_str(&self, target_ip: &str) -> String { 94 | let proto_str; 95 | let proto_segment = match self.proto { 96 | IperfProto::Tcp => "", 97 | IperfProto::Udp { bw, pkt_sz } => { 98 | proto_str = format!("-u --length {pkt_sz} -b {bw}M"); 99 | proto_str.as_str() 100 | } 101 | }; 102 | let dir_segment = match self.mode { 103 | IperfMode::ClientSend => "", 104 | IperfMode::ServerSend => "-R", 105 | IperfMode::BiDir => "--bidir", 106 | }; 107 | 108 | let n_streams = self.n_streams.unwrap_or(8); 109 | 110 | // XXX: Setting several parallel streams because we don't 111 | // really have packet-wise ECMP yet from ddm -- the 112 | // P-values won't change, so the flowkey remains the same. 113 | // XXX: At higher rates -P instead of n x iPerf servers will 114 | // bottleneck us. This is fine at ~2Gbps, but will need 115 | // rework in future. 116 | format!( 117 | "iperf -c {target_ip} -J -P {n_streams} {proto_segment} {dir_segment}" 118 | ) 119 | } 120 | 121 | /// Name of an experiment, used for storing different workloads 122 | /// and measurement types in distinct directories. 123 | pub fn benchmark_group(&self) -> String { 124 | format!( 125 | "iperf-{}/{}/{}", 126 | match self.proto { 127 | IperfProto::Tcp => "tcp", 128 | IperfProto::Udp { .. } => "udp", 129 | }, 130 | self.expt_name, 131 | match self.mode { 132 | IperfMode::ClientSend => "c2s", 133 | IperfMode::ServerSend => "s2c", 134 | IperfMode::BiDir => "bidir", 135 | } 136 | ) 137 | } 138 | 139 | /// Title to use in a flamegraph built from a set of measurements. 140 | pub fn title(&self) -> String { 141 | format!("iperf3 ({}) -- {}", self.mode, self.proto) 142 | } 143 | } 144 | 145 | // TODO: want these as json somewhere, with command line options 146 | // to choose which are run. 147 | pub fn base_experiments(expt_name: &str) -> Vec { 148 | let base = 149 | IperfConfig { expt_name: expt_name.to_string(), ..Default::default() }; 150 | vec![ 151 | // lockstat: (almost) raw speeds. 152 | IperfConfig { 153 | instrumentation: Instrumentation::Lockstat, 154 | n_iters: 5, 155 | mode: IperfMode::ClientSend, 156 | ..base.clone() 157 | }, 158 | IperfConfig { 159 | instrumentation: Instrumentation::Lockstat, 160 | n_iters: 5, 161 | mode: IperfMode::ServerSend, 162 | ..base.clone() 163 | }, 164 | // dtrace: collect all the stats! 165 | IperfConfig { mode: IperfMode::ClientSend, ..base.clone() }, 166 | IperfConfig { mode: IperfMode::ServerSend, ..base.clone() }, 167 | ] 168 | } 169 | -------------------------------------------------------------------------------- /crates/derror-macro/src/lib.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2025 Oxide Computer Company 6 | 7 | use darling::FromDeriveInput; 8 | use proc_macro2::TokenStream; 9 | use quote::format_ident; 10 | use quote::quote; 11 | use syn::DeriveInput; 12 | use syn::parse_macro_input; 13 | 14 | #[derive(FromDeriveInput)] 15 | #[darling(attributes(derror))] 16 | struct Args { 17 | leaf_data: Option, 18 | } 19 | 20 | /// Generate a `DError` implementation given a tree-structured enum 21 | /// where only leaf nodes hold additional data. 22 | /// 23 | /// This allows for deeply nested enums to be more easily understood in 24 | /// dtrace probes without calling `format!()`. 25 | /// 26 | /// This is intended for annotating error chains such as: 27 | /// ```ignore 28 | /// #[derive(DError)] 29 | /// enum SomeErrors { 30 | /// A, 31 | /// B(NestedError), 32 | /// } 33 | /// 34 | /// #[derive(DError)] 35 | /// #[derror(leaf_data = data_fn)] 36 | /// enum NestedError { 37 | /// Data1 { val1: u64, val2: u8}, 38 | /// #[leaf] 39 | /// Data2(u32), 40 | /// NoData, 41 | /// } 42 | /// 43 | /// fn data_fn(val: &NestedError, data: &mut [u8]) { 44 | /// [data[0], data[1]] = match { 45 | /// Self::Data1 { val1, val2 } => [val1 as u64, val2 as u64], 46 | /// Self::Data2(d) => [d as u64, 0], 47 | /// _ => [0, 0], 48 | /// } 49 | /// } 50 | /// ``` 51 | /// The macro will automatically generate `CStrs` for every enum variant 52 | /// and will traverse down all single-element tuple variants unless annotated 53 | /// as `#[leaf]`s. A `leaf_data` function can be specfied to fill in the data 54 | /// segment of an `ErrorBlock`. This is currently fixed as a `[u64; 2]`. 55 | #[proc_macro_derive(DError, attributes(derror, leaf))] 56 | pub fn derive_derror( 57 | input: proc_macro::TokenStream, 58 | ) -> proc_macro::TokenStream { 59 | let derive_input = parse_macro_input!(input); 60 | 61 | let parsed_args = match Args::from_derive_input(&derive_input) { 62 | Ok(o) => o, 63 | Err(e) => return e.write_errors().into(), 64 | }; 65 | 66 | let DeriveInput { ident, data, .. } = derive_input; 67 | 68 | let syn::Data::Enum(data) = data else { 69 | panic!("cannot autoderive `DError` for struct or union"); 70 | }; 71 | 72 | let mut cstr_decls: Vec = vec![]; 73 | let mut cstr_arms: Vec = vec![]; 74 | let mut child_arms: Vec = vec![]; 75 | 76 | for pair in data.variants.into_pairs() { 77 | let variant = pair.into_value(); 78 | let var_name = variant.ident; 79 | 80 | let static_name = format_ident!("{}_cstr", var_name); 81 | let mut var_name_bytes = var_name.to_string().into_bytes(); 82 | var_name_bytes.push(0); 83 | let static_name_val = 84 | syn::LitByteStr::new(&var_name_bytes, var_name.span()); 85 | 86 | // TODO: use c"" once proc_macro_c_str_literals (https://github.com/rust-lang/rust/issues/119750) stabilised. 87 | cstr_decls.push(quote! { 88 | static #static_name: &CStr = if let Ok(s) = CStr::from_bytes_with_nul(#static_name_val) { 89 | s 90 | } else { 91 | panic!("Bad cstring constant!") 92 | }; 93 | }); 94 | 95 | let known_leaf = 96 | variant.attrs.iter().any(|v| v.path().is_ident("leaf")); 97 | 98 | let (cstr_block, child_block) = match variant.fields { 99 | syn::Fields::Unnamed(fields) => ( 100 | quote! {Self::#var_name(f) => #static_name,}, 101 | if !known_leaf && fields.unnamed.len() == 1 { 102 | quote! { 103 | Self::#var_name(f) => Some(f), 104 | } 105 | } else { 106 | quote! { 107 | Self::#var_name(..) => None, 108 | } 109 | }, 110 | ), 111 | syn::Fields::Named(_) => ( 112 | quote! { 113 | Self::#var_name{ .. } => #static_name, 114 | }, 115 | quote! { 116 | Self::#var_name{ .. } => None, 117 | }, 118 | ), 119 | syn::Fields::Unit => ( 120 | quote! { 121 | Self::#var_name => #static_name, 122 | }, 123 | quote! { 124 | Self::#var_name => None, 125 | }, 126 | ), 127 | }; 128 | 129 | cstr_arms.push(cstr_block); 130 | child_arms.push(child_block); 131 | } 132 | 133 | let leaf_data_impl = if let Some(data_fn) = parsed_args.leaf_data { 134 | quote! { 135 | fn leaf_data(&self, data: &mut [u64]) { 136 | #data_fn(self, data); 137 | } 138 | } 139 | } else { 140 | quote! {} 141 | }; 142 | 143 | quote! { 144 | impl DError for #ident { 145 | #[allow(non_upper_case_globals)] 146 | #[inline] 147 | fn discriminant(&self) -> &'static ::core::ffi::CStr { 148 | use ::core::ffi::CStr; 149 | #( #cstr_decls )* 150 | match self { 151 | #( #cstr_arms )* 152 | } 153 | } 154 | 155 | #[inline] 156 | fn child(&self) -> Option<&dyn DError> { 157 | match self { 158 | #( #child_arms )* 159 | } 160 | } 161 | 162 | #leaf_data_impl 163 | } 164 | } 165 | .into() 166 | } 167 | -------------------------------------------------------------------------------- /xde/xde-link/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![allow(non_camel_case_types)] 2 | #![no_std] 3 | 4 | #[panic_handler] 5 | fn panic(_: &core::panic::PanicInfo) -> ! { 6 | #[link(name = "c")] 7 | unsafe extern "C" { 8 | fn abort() -> !; 9 | } 10 | unsafe { abort() } 11 | } 12 | 13 | // devfsadm expects 2 symbols to be exported: 14 | // - _devfsadm_create_reg: link creation registration 15 | // - _devfsadm_remove_reg: link removal registration 16 | 17 | /// devfsadm plugin link creation registration 18 | /// Exported plugin entry point for 19 | #[unsafe(no_mangle)] 20 | pub static _devfsadm_create_reg: _devfsadm_create_reg_t = 21 | _devfsadm_create_reg_t { 22 | version: DEVFSADM_V0, 23 | count: 1, 24 | tblp: &devfsadm_create_t { 25 | device_class: c"pseudo".as_ptr(), 26 | node_type: DDI_PSEUDO, 27 | drv_name: c"xde".as_ptr(), 28 | flags: TYPE_EXACT | DRV_EXACT, 29 | interpose_lvl: ILEVEL_0, 30 | callback_fcn: create_xde_link, 31 | }, 32 | }; 33 | 34 | /// devfsadm plugin link removal registration 35 | #[unsafe(no_mangle)] 36 | pub static _devfsadm_remove_reg: _devfsadm_remove_reg_t = 37 | _devfsadm_remove_reg_t { 38 | version: DEVFSADM_V0, 39 | count: 1, 40 | tblp: &devfsadm_remove_t { 41 | device_class: c"pseudo".as_ptr(), 42 | dev_dirs_re: c"^xde$".as_ptr(), 43 | flags: RM_HOT | RM_PRE | RM_ALWAYS, 44 | interpose_lvl: ILEVEL_0, 45 | callback_fcn: devfsadm_rm_all, 46 | }, 47 | }; 48 | 49 | /// Create xde /dev link for the control device 50 | /// /dev/xde => /devices/pseudo/xde@0:ctl 51 | unsafe extern "C" fn create_xde_link( 52 | minor: *const di_minor, 53 | node: *const di_node, 54 | ) -> c_int { 55 | #[link(name = "c")] 56 | unsafe extern "C" { 57 | fn strcmp(s1: *const c_char, s2: *const c_char) -> c_int; 58 | } 59 | unsafe { 60 | if strcmp(di_minor_name(minor), c"ctl".as_ptr()) == 0 { 61 | devfsadm_mklink(c"xde".as_ptr(), node, minor, 0); 62 | } 63 | } 64 | 0 65 | } 66 | 67 | use core::ffi::c_char; 68 | use core::ffi::c_int; 69 | use core::ffi::c_uint; 70 | 71 | /// devfsadm plugin interface version 0 72 | pub const DEVFSADM_V0: c_uint = 0; 73 | 74 | /// Create /dev link at the root 75 | pub const ILEVEL_0: c_int = 0; 76 | 77 | /// Match minor node type exactly 78 | pub const TYPE_EXACT: c_int = 0x01; 79 | 80 | /// Match driver name exactly 81 | pub const DRV_EXACT: c_int = 0x10; 82 | 83 | /// Remove /dev link when device is hot-removed 84 | pub const RM_HOT: c_int = 0x01; 85 | 86 | /// Remove /dev link before processing entire devinfo tree 87 | pub const RM_PRE: c_int = 0x02; 88 | 89 | /// Remove /dev link even if cleanup wasn't requested 90 | pub const RM_ALWAYS: c_int = 0x08; 91 | 92 | /// Minor node type for pseudo devices 93 | pub const DDI_PSEUDO: *const c_char = c"ddi_pseudo".as_ptr(); 94 | 95 | /// Opaque minor node handle 96 | type di_minor = core::ffi::c_void; 97 | 98 | /// Opaque device node handle 99 | type di_node = core::ffi::c_void; 100 | 101 | // See lib/libdevinfo/libdevinfo.h 102 | #[link(name = "devinfo")] 103 | unsafe extern "C" { 104 | /// Returns name for give minor node 105 | fn di_minor_name(minor: *const di_minor) -> *const c_char; 106 | } 107 | 108 | // These symbols exist in the `devfsadm` binary itself which is the one 109 | // that will be `dlopen()`'ing the plugin. 110 | unsafe extern "C" { 111 | fn devfsadm_mklink( 112 | link: *const c_char, 113 | node: *const di_node, 114 | minor: *const di_minor, 115 | flags: c_int, 116 | ) -> c_int; 117 | fn devfsadm_rm_all(file: *const c_char); 118 | } 119 | 120 | /// Predicates used to match a device and how to create its /dev link 121 | /// 122 | /// cmd/devfsadm/devfsadm.h 123 | #[repr(C)] 124 | struct devfsadm_create_t { 125 | /// Device class to match (e.g. "pseudo", "disk") 126 | device_class: *const c_char, 127 | /// Minor node type to match (e.g. DDI_PSEUDO, DDI_NT_BLOCK) 128 | node_type: *const c_char, 129 | /// Driver name to match 130 | drv_name: *const c_char, 131 | /// Flags to control matching 132 | flags: c_int, 133 | /// Level at which to create /dev/ link 134 | /// (e.g. ILEVEL_0, ILEVEL_1, ILEVEL_2) 135 | interpose_lvl: c_int, 136 | /// Callback to create /dev/ link 137 | callback_fcn: 138 | unsafe extern "C" fn(*const di_minor, *const di_node) -> c_int, 139 | } 140 | 141 | /// devfsadm plugin link creation registration information 142 | #[repr(C)] 143 | pub struct _devfsadm_create_reg_t { 144 | /// devfsadm plugin interface version 145 | version: c_uint, 146 | /// Number of entries in the creation entry table (`tblp`) 147 | count: c_uint, 148 | /// Table of link creation entries 149 | tblp: *const devfsadm_create_t, 150 | } 151 | // SAFETY: Sync is required to stick this in a static. 152 | // The only non-Sync field is `tblp` which is never modified. 153 | unsafe impl Sync for _devfsadm_create_reg_t {} 154 | 155 | /// Predicates used to match a device and how to remove its /dev link 156 | #[repr(C)] 157 | struct devfsadm_remove_t { 158 | /// Device class to match (e.g. "pseudo", "disk") 159 | device_class: *const c_char, 160 | /// Regex to match /dev directories 161 | dev_dirs_re: *const c_char, 162 | /// Flags to control removal 163 | flags: c_int, 164 | /// /dev/ dir nesting level 165 | interpose_lvl: c_int, 166 | /// Callback to remove /dev/ links 167 | callback_fcn: unsafe extern "C" fn(*const c_char), 168 | } 169 | 170 | /// devfsadm plugin link deletion registration information 171 | #[repr(C)] 172 | pub struct _devfsadm_remove_reg_t { 173 | /// devfsadm plugin interface version 174 | version: c_uint, 175 | /// Number of entries in the removal entry table (`tblp`) 176 | count: c_uint, 177 | /// Table of link removal entries 178 | tblp: *const devfsadm_remove_t, 179 | } 180 | // SAFETY: Sync is required to stick this in a static. 181 | // The only non-Sync field is `tblp` which is never modified. 182 | unsafe impl Sync for _devfsadm_remove_reg_t {} 183 | --------------------------------------------------------------------------------