├── benchs ├── bench_util │ ├── src │ │ ├── doca │ │ │ ├── mod.rs │ │ │ └── args.rs │ │ ├── doorbell │ │ │ ├── mod.rs │ │ │ ├── rc_doorbell.rs │ │ │ ├── ud_doorbell.rs │ │ │ ├── doorbell.rs │ │ │ └── recv_doorbell.rs │ │ ├── rdtsc.rs │ │ ├── ud_message.rs │ │ ├── lib.rs │ │ ├── ud_manager.rs │ │ ├── args.rs │ │ └── ud_endpoint.rs │ └── Cargo.toml ├── Cargo.toml ├── one_sided_rdma │ ├── Cargo.toml │ └── src │ │ ├── main.rs │ │ └── bootstrap │ │ ├── server_construct.rs │ │ ├── mod.rs │ │ └── client_construct.rs ├── two_sided_rdma │ ├── Cargo.toml │ └── src │ │ ├── main.rs │ │ └── bootstrap │ │ ├── mod.rs │ │ ├── server_construct.rs │ │ └── client_construct.rs ├── doca_dma │ ├── Cargo.toml │ └── src │ │ ├── main.rs │ │ └── bootstrap │ │ ├── connection.rs │ │ ├── mod.rs │ │ ├── server_construct.rs │ │ └── client_construct.rs ├── docs │ ├── README.md │ ├── dma.md │ ├── doca_dma.md │ ├── one_sided_rdma.md │ └── two_sided_rdma.md └── rdma_util │ └── src │ ├── rc_doorbell.rs │ ├── ud_doorbell.rs │ └── recv_doorbell.rs ├── .gitignore ├── .gitmodules ├── netbencher_core ├── Cargo.toml ├── examples │ ├── report_worker_stats.rs │ ├── coordinator_report_master.rs │ └── coordinator_report_worker.rs ├── src │ ├── reporter │ │ ├── simple_reporter.rs │ │ ├── mod.rs │ │ └── coordinated_reporter.rs │ └── lib.rs └── README.md ├── README.md └── makefile /benchs/bench_util/src/doca/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod args; -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/target/ 2 | **/Cargo.lock 3 | bins/ 4 | **/results/ 5 | .vscode/ -------------------------------------------------------------------------------- /benchs/Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | 3 | members = [ 4 | 'bench_util', 5 | 'one_sided_rdma', 6 | 'two_sided_rdma', 7 | 'doca_dma', 8 | ] -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "deps/krcore"] 2 | path = deps/krcore 3 | url = https://github.com/SJTU-IPADS/krcore-artifacts.git 4 | branch = develop 5 | [submodule "deps/rust-doca"] 6 | path = deps/rust-doca 7 | url = https://github.com/smartnickit-project/rust-doca.git 8 | branch = main 9 | -------------------------------------------------------------------------------- /benchs/bench_util/src/doorbell/mod.rs: -------------------------------------------------------------------------------- 1 | mod doorbell; 2 | pub use doorbell::DoorbellHelper; 3 | 4 | mod rc_doorbell; 5 | pub use rc_doorbell::RcDoorbellHelper; 6 | 7 | mod ud_doorbell; 8 | pub use ud_doorbell::UdDoorbellHelper; 9 | 10 | mod recv_doorbell; 11 | pub use recv_doorbell::RecvDoorbellHelper; -------------------------------------------------------------------------------- /netbencher_core/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "netbencher_core" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | tokio = { version = "1.20.1", features = ["full"]} 10 | serde = "1.0.144" 11 | serde_derive = "1.0.144" 12 | serde_json = "1.0.85" 13 | log = { version = "*"} 14 | simplelog = "^0.12.0" 15 | clap = "4.1.1" -------------------------------------------------------------------------------- /benchs/bench_util/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "bench_util" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | clap = { version = "4.1.1", features = ["derive"] } 8 | rand = "0.8.5" 9 | rand_chacha = "*" 10 | tokio = { version = "1.20.1", features = ["full"]} 11 | KRdmaKit = { path = "../../deps/krcore/KRdmaKit", features = ["user"] } 12 | serde = "1.0.144" 13 | serde_derive = "1.0.144" 14 | serde_json = "1.0.85" 15 | log = { version = "*"} 16 | simplelog = "^0.12.0" 17 | 18 | [features] 19 | OFED_5_4 = ["KRdmaKit/OFED_5_4"] 20 | ARM = [] -------------------------------------------------------------------------------- /benchs/one_sided_rdma/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "one_sided_rdma" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | KRdmaKit = { path = "../../deps/krcore/KRdmaKit", features = ["user"] } 8 | bench_util = { path = "../bench_util" } 9 | clap = { version = "4.1.1", features = ["derive"] } 10 | netbencher_core = { path = "../../netbencher_core" } 11 | rand = "0.8.5" 12 | rand_chacha = "*" 13 | log = { version = "*"} 14 | simplelog = "^0.12.0" 15 | tokio = { version = "1.20.1", features = ["full"]} 16 | ctrlc = "3.2.3" 17 | 18 | [features] 19 | OFED_5_4 = ["KRdmaKit/OFED_5_4", "bench_util/OFED_5_4"] 20 | ARM = ["bench_util/ARM"] -------------------------------------------------------------------------------- /benchs/two_sided_rdma/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "two_sided_rdma" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | KRdmaKit = { path = "../../deps/krcore/KRdmaKit", features = ["user"] } 8 | bench_util = { path = "../bench_util" } 9 | clap = { version = "4.1.1", features = ["derive"] } 10 | netbencher_core = { path = "../../netbencher_core" } 11 | rand = "0.8.5" 12 | rand_chacha = "*" 13 | log = { version = "*"} 14 | simplelog = "^0.12.0" 15 | tokio = { version = "1.20.1", features = ["full"]} 16 | ctrlc = "3.2.3" 17 | 18 | [features] 19 | OFED_5_4 = ["KRdmaKit/OFED_5_4", "bench_util/OFED_5_4"] 20 | ARM = ["bench_util/ARM"] 21 | -------------------------------------------------------------------------------- /benchs/bench_util/src/rdtsc.rs: -------------------------------------------------------------------------------- 1 | use core::arch::x86_64::_rdtsc; 2 | use std::{thread, time}; 3 | use log::info; 4 | 5 | #[inline] 6 | pub fn get_rdtsc() -> u64 { 7 | unsafe { _rdtsc() } 8 | } 9 | 10 | #[inline] 11 | pub fn get_one_sec_rdtsc() -> f64 { 12 | let begin = get_rdtsc(); 13 | thread::sleep(time::Duration::from_secs(1)); 14 | let end = get_rdtsc(); 15 | info!("One sec is equal to {} cycles.", end-begin); 16 | (end - begin) as f64 17 | } 18 | 19 | #[inline] 20 | pub fn convert_rdtsc_to_ns(num: u64) -> f64 { 21 | let one_sec = get_one_sec_rdtsc(); 22 | let sec: f64 = num as f64 / one_sec; 23 | sec * ((1000 * 1000 * 1000) as f64) 24 | } -------------------------------------------------------------------------------- /benchs/doca_dma/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "doca_dma" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | doca = { path = "../../deps/rust-doca/doca" } 8 | bench_util = { path = "../bench_util" } 9 | clap = { version = "4.1.1", features = ["derive"] } 10 | netbencher_core = { path = "../../netbencher_core" } 11 | rand = "0.8.5" 12 | rand_chacha = "*" 13 | log = { version = "*"} 14 | simplelog = "^0.12.0" 15 | tokio = { version = "1.20.1", features = ["full"]} 16 | ctrlc = "3.2.3" 17 | nix = "0.25.0" 18 | serde = "1.0.144" 19 | serde_derive = "1.0.144" 20 | serde_json = "1.0.85" 21 | 22 | [features] 23 | OFED_5_4 = ["bench_util/OFED_5_4"] 24 | ARM = ["bench_util/ARM"] -------------------------------------------------------------------------------- /benchs/doca_dma/src/main.rs: -------------------------------------------------------------------------------- 1 | #![feature(get_mut_unchecked)] 2 | 3 | mod bootstrap; 4 | use bootstrap::*; 5 | 6 | use bench_util::doca::args::CmdlineArgs; 7 | use clap::Parser; 8 | 9 | use log::*; 10 | use simplelog::*; 11 | 12 | fn main() { 13 | TermLogger::init( 14 | LevelFilter::Info, 15 | Config::default(), 16 | TerminalMode::Stdout, 17 | ColorChoice::Auto, 18 | ).unwrap(); 19 | let mut args = CmdlineArgs::parse(); 20 | args.coordinate(); 21 | // main_inner will create threads and wait for them to exit 22 | main_inner(args); 23 | } 24 | 25 | fn main_inner(args: CmdlineArgs) { 26 | if args.server { 27 | bootstrap_server(args); 28 | } else { 29 | bootstrap_client(args); 30 | } 31 | } -------------------------------------------------------------------------------- /benchs/docs/README.md: -------------------------------------------------------------------------------- 1 | # Latency and throughput results of SmartNIC 2 | 3 | ## Crate descriptions 4 | 5 | |Application|Description| 6 | |-----|-------------------| 7 | |`one_sided_rdma`|The crate for one-sided RDMA microbenchmarks.| 8 | |`two_sided_rdma`|The crate for two-sided UD RDMA microbenchmarks.| 9 | |`doca_dma` |The crate for doca DMA microbenchmarks.| 10 | 11 | |Library|Description| 12 | |-----|-------------------| 13 | |`bench_util`|A library crate containing common functions (e.g. doorbell batching, rdtsc-counter, command-line arguments) for the bench.| 14 | 15 | ## Run Evaluations 16 | 17 | For how to run each of the benchmark, please refer to the following docs: 18 | 19 | - [ONE SIDED RDMA](one_sided_rdma.md) 20 | - [TWO SIDED RDMA](two_sided_rdma.md) 21 | - [DOCA DMA](doca_dma.md) 22 | -------------------------------------------------------------------------------- /benchs/one_sided_rdma/src/main.rs: -------------------------------------------------------------------------------- 1 | #![feature(get_mut_unchecked)] 2 | 3 | mod bootstrap; 4 | use bootstrap::*; 5 | 6 | use bench_util::args::*; 7 | use clap::Parser; 8 | 9 | use log::*; 10 | use simplelog::*; 11 | 12 | fn main() { 13 | TermLogger::init( 14 | LevelFilter::Info, 15 | Config::default(), 16 | TerminalMode::Stdout, 17 | ColorChoice::Auto, 18 | ).unwrap(); 19 | let mut args = CmdlineArgs::parse(); 20 | args.coordinate(); 21 | // main_inner will create threads and wait for them to exit 22 | main_inner(args); 23 | } 24 | 25 | fn main_inner(args: CmdlineArgs) { 26 | 27 | debug!( 28 | "Sanity check parameters: payload {}, nthreads {}, use READ {}, signaled: {}, doorbell: {}", 29 | args.payload, 30 | args.threads, 31 | args.read, 32 | args.signaled, 33 | args.doorbell, 34 | ); 35 | 36 | if args.server { 37 | bootstrap_server(args); 38 | } else { 39 | bootstrap_client(args); 40 | } 41 | } -------------------------------------------------------------------------------- /benchs/two_sided_rdma/src/main.rs: -------------------------------------------------------------------------------- 1 | #![feature(get_mut_unchecked)] 2 | 3 | mod bootstrap; 4 | use bootstrap::*; 5 | 6 | use bench_util::args::*; 7 | use clap::Parser; 8 | 9 | use log::*; 10 | use simplelog::*; 11 | 12 | fn main() { 13 | TermLogger::init( 14 | LevelFilter::Info, 15 | Config::default(), 16 | TerminalMode::Stdout, 17 | ColorChoice::Auto, 18 | ).unwrap(); 19 | let mut args = CmdlineArgs::parse(); 20 | args.coordinate(); 21 | // main_inner will create threads and wait for them to exit 22 | main_inner(args); 23 | } 24 | 25 | fn main_inner(args: CmdlineArgs) { 26 | 27 | debug!( 28 | "Sanity check parameters: payload {}, nthreads {}, use READ {}, signaled: {}, doorbell: {}", 29 | args.payload, 30 | args.threads, 31 | args.read, 32 | args.signaled, 33 | args.doorbell, 34 | ); 35 | 36 | if args.server { 37 | bootstrap_server(args); 38 | } else { 39 | bootstrap_client(args); 40 | } 41 | } -------------------------------------------------------------------------------- /netbencher_core/examples/report_worker_stats.rs: -------------------------------------------------------------------------------- 1 | #![feature(get_mut_unchecked)] 2 | 3 | extern crate netbencher_core; 4 | 5 | use std::sync::Arc; 6 | 7 | use netbencher_core::{BenchRunner, SimpleBenchReporter}; 8 | 9 | fn main() { 10 | let mut runner = BenchRunner::new(2); 11 | runner.run( 12 | // The evaluated function will increase the statics per second 13 | |worker_id, runner, mut stats, _| { 14 | println!("Worker {} started", worker_id); 15 | while runner.running() { 16 | std::thread::sleep(std::time::Duration::from_secs(1)); 17 | unsafe { Arc::get_mut_unchecked(&mut stats).finished_one_op() }; 18 | } 19 | }, 20 | (), 21 | ); 22 | 23 | let mut reporter = SimpleBenchReporter::new(); 24 | for _ in 0..10 { 25 | std::thread::sleep(std::time::Duration::from_secs(1)); 26 | let stat = runner.report(&mut reporter); 27 | println!("Results: {}", stat); 28 | 29 | } 30 | 31 | runner.stop().unwrap(); 32 | } 33 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Smartbench 2 | 3 | Smart-bench is a rust-based benchmarking tool for [BlueField-series SmartNICs](https://www.nvidia.com/en-us/networking/products/data-processing-unit/). The purpose is to enable easy testing of BlueField-series SmartNICs. It is built on top of [DOCA](https://developer.nvidia.com/doca) and RDMA. The detailed results are summarized in our [paper](https://www.usenix.org/conference/osdi23/presentation/wei), please refer to here if you are interested. 4 | 5 | ## Evaluated benchmarks 6 | 7 | Smart-bench contains a set of benchmarks: 8 | 9 | - [one_sided_rdma](benchs/one_sided_rdma/) 10 | - [two_sided_rdma](benchs/two_sided_rdma/) 11 | - [doca_dma](benchs/doca_dma/) 12 | 13 | We pack our codes into a few building blocks: 14 | - [bench_util](benchs/bench_util/): a set of utilities for benchmarks. 15 | - [netbencher_core](netbencher_core/): a framework to start benchmark threads on different threads. 16 | 17 | We are continually maintaining the codebase to include features from future SmartNICs. 18 | 19 | 20 | ## Quick start 21 | 22 | Please refer to [README](benchs/docs/README.md). 23 | 24 | ## License Details 25 | 26 | MIT License -------------------------------------------------------------------------------- /benchs/bench_util/src/ud_message.rs: -------------------------------------------------------------------------------- 1 | use crate::{ GRH_SZ, CACHE_LINE_SZ }; 2 | use crate::round_up; 3 | 4 | static MSG_SCALE: f64 = 3.3; 5 | 6 | /// Align every UD message to cache 7 | /// First align with cacheline, then minus GRH_SZ 8 | /// to make sure no partial write will be generated at remote side. 9 | #[inline] 10 | pub fn align_to_cacheline(payload: u64) -> u64 { 11 | let mut result = payload; 12 | if payload <= CACHE_LINE_SZ { 13 | result = CACHE_LINE_SZ - GRH_SZ; 14 | } else { 15 | let payload = round_up(payload, CACHE_LINE_SZ.try_into().unwrap()); 16 | result = payload - GRH_SZ; 17 | } 18 | result 19 | } 20 | 21 | pub struct UdBuffer { 22 | pub capacity: u64, 23 | pub cur_idx: u64, 24 | pub msg_size: u64, 25 | } 26 | 27 | impl UdBuffer { 28 | pub fn new(capacity: u64, msg_size: u64) -> Self { 29 | Self { 30 | capacity: capacity, 31 | cur_idx: 0, 32 | msg_size: msg_size, 33 | } 34 | } 35 | 36 | #[inline] 37 | pub fn get_region_size(&self) -> u64 { 38 | round_up(self.capacity * self.msg_size * 4, 64) 39 | } 40 | 41 | #[inline] 42 | pub fn get_start_addr(&mut self) -> u64 { 43 | let start = round_up(((self.cur_idx * self.msg_size) as f64 * MSG_SCALE) as u64, 64); 44 | self.cur_idx = (self.cur_idx + 1) % self.capacity; 45 | start 46 | } 47 | } -------------------------------------------------------------------------------- /benchs/doca_dma/src/bootstrap/connection.rs: -------------------------------------------------------------------------------- 1 | use doca::{ RawPointer, RawPointerMsg, DOCAMmap }; 2 | use doca::open_device_with_pci; 3 | use serde_derive::{ Serialize, Deserialize }; 4 | 5 | pub const DOCA_MAX_CONN_LENGTH: usize = 4096; 6 | 7 | #[derive(Serialize, Deserialize)] 8 | pub struct DocaConnInfoMsg { 9 | pub exports: Vec>, 10 | pub buffers: Vec, 11 | } 12 | 13 | #[derive(Clone)] 14 | pub struct DocaConnInfo { 15 | pub exports: Vec>, 16 | pub buffers: Vec, 17 | } 18 | 19 | impl Default for DocaConnInfo { 20 | fn default() -> Self { 21 | Self { 22 | exports: Vec::new(), 23 | buffers: Vec::new(), 24 | } 25 | } 26 | } 27 | 28 | impl From for DocaConnInfoMsg { 29 | fn from(info: DocaConnInfo) -> Self { 30 | Self { 31 | exports: info.exports, 32 | buffers: info.buffers.into_iter().map(|v| v.into()).collect(), 33 | } 34 | } 35 | } 36 | 37 | impl From for DocaConnInfo { 38 | fn from(msg: DocaConnInfoMsg) -> Self { 39 | Self { 40 | exports: msg.exports, 41 | buffers: msg.buffers.into_iter().map(|v| v.into()).collect(), 42 | } 43 | } 44 | } 45 | 46 | impl DocaConnInfo { 47 | pub fn serialize(data: DocaConnInfo) -> Vec { 48 | let msg: DocaConnInfoMsg = data.into(); 49 | serde_json::to_vec(&msg).unwrap() 50 | } 51 | 52 | pub fn deserialize(data: &[u8]) -> DocaConnInfo { 53 | let msg: DocaConnInfoMsg = serde_json::from_slice(data).unwrap(); 54 | let data: DocaConnInfo = msg.into(); 55 | data 56 | } 57 | } -------------------------------------------------------------------------------- /netbencher_core/examples/coordinator_report_master.rs: -------------------------------------------------------------------------------- 1 | extern crate netbencher_core; 2 | 3 | use std::time::Duration; 4 | 5 | use clap::{Command, Arg, arg}; 6 | use simplelog::*; 7 | use log::{info, LevelFilter}; 8 | 9 | use tokio::runtime::Runtime; 10 | 11 | use netbencher_core::CoordinatedReporterMaster; 12 | 13 | fn main() { 14 | TermLogger::init( 15 | LevelFilter::Info, 16 | Config::default(), 17 | TerminalMode::Stdout, 18 | ColorChoice::Auto, 19 | ).unwrap(); 20 | 21 | let matches = Command::new("bench example master") 22 | .arg( 23 | Arg::new("num_reports") 24 | .short('n') 25 | .long("num_reporters") 26 | .default_value("1"), 27 | ) 28 | .arg( 29 | Arg::new("listen_addr") 30 | .short('r') 31 | .long("listen_addr") 32 | .required(true), 33 | ) 34 | .arg( 35 | Arg::new("duration_secs") 36 | .short('d') 37 | .long("duration_secs") 38 | .default_value("20"), 39 | ) 40 | .get_matches(); 41 | 42 | Runtime::new().unwrap().block_on(async { 43 | let mut master = CoordinatedReporterMaster::new( 44 | *matches.get_one("num_reports") 45 | .expect("failed to get num reports"), 46 | matches.get_one::("listen_addr").unwrap().to_string() 47 | .parse().unwrap(), 48 | ) 49 | .await 50 | .expect("failed to create the master"); 51 | 52 | master 53 | .report_event_loop( 54 | Duration::from_secs(*matches.get_one("duration_secs").unwrap()), 55 | Duration::from_secs(1), 56 | ) 57 | .await 58 | .expect("Event loop report error"); 59 | }); 60 | 61 | info!("Master done"); 62 | } 63 | -------------------------------------------------------------------------------- /benchs/doca_dma/src/bootstrap/mod.rs: -------------------------------------------------------------------------------- 1 | mod client_construct; 2 | pub use client_construct::{ perform_client_routine, recv_doca_config }; 3 | 4 | mod server_construct; 5 | pub use server_construct::perform_server_routine; 6 | 7 | mod connection; 8 | pub use connection::{ DocaConnInfo, DocaConnInfoMsg }; 9 | pub use connection::DOCA_MAX_CONN_LENGTH; 10 | 11 | use std::{ thread, time }; 12 | use std::time::Duration; 13 | use tokio::runtime::Runtime; 14 | 15 | use bench_util::doca::args::*; 16 | use bench_util::{ MAX_CLIENTS, MIN_SERVER_LIFE }; 17 | 18 | use netbencher_core::{ 19 | CoordinatedReporterMaster, 20 | BenchRunner, 21 | SimpleBenchReporter, 22 | CoordinatedReporter, 23 | }; 24 | 25 | use log::*; 26 | 27 | pub fn bootstrap_client(mut args: CmdlineArgs) { 28 | /* load config using TCP channel */ 29 | let doca_conn_msg = Runtime::new().unwrap().block_on(recv_doca_config(args.listen_addr.parse().unwrap())); 30 | let mut runner = BenchRunner::new(args.threads as usize); 31 | // let mut runner = BenchRunner::new(args.threads.try_into().unwrap()); 32 | runner.run(move |thread_id, runner, stat, args| { 33 | perform_client_routine(thread_id, runner, stat, doca_conn_msg.clone(), args); 34 | }, args.clone()); 35 | 36 | let mut inner_reporter = SimpleBenchReporter::new_with_id(args.client_id.try_into().unwrap()); 37 | 38 | for epoch in 0..args.life { 39 | thread::sleep(time::Duration::from_secs(1)); 40 | info!("{}", runner.report(&mut inner_reporter)); 41 | } 42 | runner.stop().unwrap(); 43 | } 44 | 45 | pub fn bootstrap_server(mut args: CmdlineArgs) { 46 | if args.life < MIN_SERVER_LIFE { 47 | args.life = MIN_SERVER_LIFE; 48 | } 49 | 50 | let mut runner = BenchRunner::new(1); 51 | runner.run(|thread_id, runner, stat, args| { 52 | perform_server_routine(runner, args); 53 | }, args.clone()); 54 | thread::sleep(Duration::from_secs(args.life.into())); 55 | runner.stop().unwrap(); 56 | } -------------------------------------------------------------------------------- /benchs/one_sided_rdma/src/bootstrap/server_construct.rs: -------------------------------------------------------------------------------- 1 | use std::{ time, thread }; 2 | use std::net::{ SocketAddr }; 3 | use std::sync::{ Arc }; 4 | use std::sync::atomic::{ compiler_fence, Ordering }; 5 | 6 | use bench_util::args::CmdlineArgs; 7 | 8 | use KRdmaKit::{ UDriver, MemoryRegion }; 9 | use KRdmaKit::services_user::{ ConnectionManagerServer, DefaultConnectionManagerHandler }; 10 | 11 | use netbencher_core::BenchRunner; 12 | 13 | use log::*; 14 | 15 | pub fn perform_server_routine(runner: Arc>, args: CmdlineArgs) 16 | where T: Send + 'static + Sync + Copy 17 | { 18 | debug!("server uses RNIC {}", args.nic_idx); 19 | 20 | // bootstrap one-sided RDMA server 21 | let ctx = UDriver::create() 22 | .expect("failed to query device") 23 | .devices() 24 | .get(args.nic_idx) 25 | .expect("no rdma device available") 26 | .open_context() 27 | .expect("failed to create RDMA context"); 28 | 29 | info!("Check registered huge page sz: {}KB", args.random_space / 1024); 30 | 31 | let mut handler = DefaultConnectionManagerHandler::new(&ctx, 1); 32 | let server_mr = if args.huge_page { 33 | MemoryRegion::new_huge_page(ctx.clone(), args.random_space as usize).expect( 34 | "Failed to allocate huge page MR" 35 | ) 36 | } else { 37 | MemoryRegion::new(ctx.clone(), args.random_space as usize).expect("Failed to allocate MR") 38 | }; 39 | 40 | handler.register_mr(vec![("MR".to_string(), server_mr)]); 41 | let server = ConnectionManagerServer::new(handler); 42 | let listen_addr: SocketAddr = args.listen_addr.parse().unwrap(); 43 | 44 | /* set listener, the server_thread listens for connection requests */ 45 | let server_thread = server.spawn_listener(listen_addr); 46 | 47 | while runner.running() { 48 | compiler_fence(Ordering::SeqCst); 49 | } 50 | server.stop_listening(); 51 | // wait for listeners to exit 52 | let _ = server_thread.join(); 53 | info!("Exit"); 54 | } -------------------------------------------------------------------------------- /netbencher_core/src/reporter/simple_reporter.rs: -------------------------------------------------------------------------------- 1 | use std::time::Instant; 2 | 3 | use super::{BenchReporter, BenchStat, CollectedBenchStat}; 4 | 5 | /// A simple reporter that reports the throughput and latency of workers from this machine. 6 | #[derive(Debug, PartialEq, Clone, Copy)] 7 | pub struct SimpleBenchReporter { 8 | stats_of_last_period: BenchStat, 9 | last_record_time: Instant, 10 | id: usize, 11 | } 12 | 13 | impl Default for SimpleBenchReporter { 14 | fn default() -> Self { 15 | Self { 16 | stats_of_last_period: BenchStat::default(), 17 | last_record_time: Instant::now(), 18 | id: 0, 19 | } 20 | } 21 | } 22 | 23 | impl SimpleBenchReporter { 24 | /// Create a new simple reporter 25 | pub fn new() -> Self { 26 | Self::default() 27 | } 28 | 29 | /// Create a new simple reporter with a given id 30 | pub fn new_with_id(id: usize) -> Self { 31 | Self { 32 | stats_of_last_period: BenchStat::default(), 33 | last_record_time: Instant::now(), 34 | id, 35 | } 36 | } 37 | } 38 | 39 | impl BenchReporter for SimpleBenchReporter { 40 | fn report_collected_stat( 41 | &mut self, 42 | stats: &Vec>, 43 | ) -> CollectedBenchStat { 44 | let mut new_stat = BenchStat::default(); 45 | for stat in stats { 46 | new_stat.num_ops_finished += stat.num_ops_finished; 47 | } 48 | 49 | let now = Instant::now(); 50 | let gap = new_stat - self.stats_of_last_period; 51 | 52 | // microseconds passed 53 | let duration = now.duration_since(self.last_record_time).as_micros() as f64; 54 | // mops 55 | let throughput = gap.num_ops_finished as f64 / duration; 56 | // microseconds 57 | let avg_latency = duration / gap.num_ops_finished as f64; 58 | 59 | self.stats_of_last_period = new_stat; 60 | self.last_record_time = now; 61 | 62 | CollectedBenchStat { 63 | id: self.id, 64 | throughput, 65 | avg_latency, 66 | p99_latency: 0.0, 67 | } 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /netbencher_core/README.md: -------------------------------------------------------------------------------- 1 | # NetBench-Core 2 | 3 | A simple framework to start threads and collect their benchmark statistical data. 4 | 5 | ## Quick start 6 | 7 | To check the framework works, simply use the following and ideally there would be no errors: 8 | 9 | ``` 10 | cargo test 11 | ``` 12 | 13 | If the crate works fine, then you can use the following way to start collecting results from threads, 14 | where each thread executes one null op per second: 15 | 16 | ```rust 17 | let mut runner = BenchRunner::new(2); 18 | runner.run( 19 | // The evaluated function will increase the statics per second 20 | |worker_id, runner, mut stats, _| { 21 | println!("Worker {} started", worker_id); 22 | while runner.running() { 23 | std::thread::sleep(std::time::Duration::from_secs(1)); 24 | unsafe { Arc::get_mut_unchecked(&mut stats).finished_one_op() }; 25 | } 26 | }, 27 | (), 28 | ); 29 | 30 | let mut reporter = SimpleBenchReporter::new(); 31 | for _ in 0..10 { 32 | std::thread::sleep(std::time::Duration::from_secs(1)); 33 | let stat = runner.report(&mut reporter); 34 | println!("Results: {}", stat); 35 | 36 | } 37 | ``` 38 | 39 | Running such piece of code would generate the following results: 40 | 41 | ``` 42 | Worker 1 started 43 | Worker 0 started 44 | Results: Throughput@0: 0.99 ops/s, Avg Latency: 0.00 ms, 99th Latency: 0.00 ms 45 | Results: Throughput@0: 1.99 ops/s, Avg Latency: 0.00 ms, 99th Latency: 0.00 ms 46 | Results: Throughput@0: 1.99 ops/s, Avg Latency: 0.00 ms, 99th Latency: 0.00 ms 47 | Results: Throughput@0: 2.99 ops/s, Avg Latency: 0.00 ms, 99th Latency: 0.00 ms 48 | ``` 49 | 50 | --- 51 | 52 | For more example, please check the code snippets in the [examples](./examples/) folder. 53 | e.g., `cargo run --example report_worker_stats`. 54 | 55 | For the coordinated reporters example, please note to first start the master then the reporter: 56 | 57 | ``` 58 | # On the master machine: 59 | cargo run --example coordinator_report_master --listen_addr="127.0.0.1:8888" 60 | 61 | # On the reporter machine(s): 62 | cargo run --example coordinator_report_worker --reporter_addr="127.0.0.1:8888" 63 | ``` 64 | 65 | Feel free to change the listen_addr or reporter_addr, as long as they are the same. -------------------------------------------------------------------------------- /benchs/bench_util/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Mod args 2 | //! CmdlineArgs: parse command line arguments for bench 3 | //! 4 | //! Mod doorbell 5 | //! Support RDMA post_send/post_recv doorbell. 6 | //! 1. RcDoorbellHelper: DoorbellHelper for RC READ/WRITE 7 | //! 2. UdDoorbellHelper: DoorbellHelper for UD SEND 8 | //! 3. RecvDoorbellHelper: DoorbellHelper for RECV 9 | //! Mod ud_endpoint 10 | //! pub fn bootstrap_uds( 11 | //! socket: &mut TcpStream, 12 | //! nic_idx: usize, 13 | //! nic_num: usize, 14 | //! threads: usize, 15 | //! client_id: u64, 16 | //! ) -> (Vec>, Vec>) 17 | //! bootstrap UD connections at client-side 18 | //! 19 | //! pub fn bootstrap_ud_server( 20 | //! threads: usize, 21 | //! nic_idx: usize, 22 | //! nic_num: usize, 23 | //! ) -> (Vec>, Vec) 24 | //! bootstrap UD server, after calling, server should be ready for client to send 25 | //! 26 | //! Mod rdtsc 27 | //! An x86-specific timer lib, should be banned with --features "ARM" in a ARM environment. 28 | //! Mod doca 29 | //! CmdlineArgs: parse command line arguments for doca_related bench 30 | 31 | #![feature(trusted_random_access)] 32 | 33 | pub mod args; 34 | pub mod doorbell; 35 | pub mod ud_endpoint; 36 | pub mod ud_manager; 37 | pub mod ud_message; 38 | 39 | #[cfg(not(feature = "ARM"))] 40 | pub mod rdtsc; 41 | 42 | pub mod doca; 43 | 44 | pub const MIN_SERVER_LIFE: u32 = 30; 45 | pub const MAX_CLIENTS: usize = 24; 46 | 47 | /// maxium size of recv-batch posted 48 | pub const MAX_RECV_NUM: usize = 64; 49 | /// global route header sz for ud send 50 | pub const GRH_SZ: u64 = 40; 51 | /// maxium inline sz for a ud send 52 | pub const MAX_INLINE_SZ: usize = 64; 53 | /// maxium pending messages 54 | pub const MAX_FLYING_MSG: u64 = 256; 55 | /// maxium payload for a ud send/recv 56 | pub const MAX_MSG_SZ: u64 = 4096; 57 | /// cacheline sz 58 | pub const CACHE_LINE_SZ: u64 = 64; 59 | 60 | #[inline] 61 | pub fn round_up(num: u64, factor: i64) -> u64 62 | { 63 | if factor == 0 64 | { 65 | return num; 66 | } 67 | 68 | ((num + factor as u64 - 1) as i64 & (-factor)) as u64 69 | } 70 | 71 | #[cfg(test)] 72 | mod tests { 73 | #[test] 74 | fn test_ud_align_to_cacheline() { 75 | use crate::ud_message::align_to_cacheline; 76 | let mut payload = 16; 77 | payload = align_to_cacheline(payload); 78 | assert_eq!(payload, 24); 79 | 80 | payload = 1024; 81 | payload = align_to_cacheline(payload); 82 | assert_eq!(payload, 984); 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /netbencher_core/examples/coordinator_report_worker.rs: -------------------------------------------------------------------------------- 1 | #![feature(get_mut_unchecked)] 2 | 3 | extern crate netbencher_core; 4 | 5 | use clap::{Command, Arg, arg}; 6 | use simplelog::*; 7 | use log::{info, warn, LevelFilter}; 8 | use std::sync::Arc; 9 | use tokio::runtime::Runtime; 10 | 11 | use netbencher_core::{BenchRunner, CoordinatedReporter, SimpleBenchReporter}; 12 | 13 | fn main() { 14 | TermLogger::init( 15 | LevelFilter::Info, 16 | Config::default(), 17 | TerminalMode::Stdout, 18 | ColorChoice::Auto, 19 | ).unwrap(); 20 | warn!("The master must alive before start the client"); 21 | 22 | let matches = Command::new("bench example") 23 | .arg( 24 | Arg::new("num_workers") 25 | .short('n') 26 | .long("num_workers") 27 | .default_value("1"), 28 | ) 29 | .arg( 30 | Arg::new("reporter_addr") 31 | .short('r') 32 | .long("reporter_addr") 33 | .required(true), 34 | ) 35 | .arg( 36 | Arg::new("id") 37 | .short('i') 38 | .long("id") 39 | .default_value("0"), 40 | ) 41 | .get_matches(); 42 | 43 | let mut runner = BenchRunner::new( 44 | *matches.get_one("num_workers") 45 | .expect("failed to get num workers"), 46 | ); 47 | runner.run( 48 | // The evaluated function will increase the statics per second 49 | |worker_id, runner, mut stats, _| { 50 | info!("Worker {} started", worker_id); 51 | while runner.running() { 52 | std::thread::sleep(std::time::Duration::from_secs(1)); 53 | unsafe { Arc::get_mut_unchecked(&mut stats).finished_one_op() }; 54 | } 55 | }, 56 | (), 57 | ); 58 | 59 | let rt = Runtime::new().unwrap(); 60 | 61 | rt.block_on(async { 62 | let inner_reporter = 63 | SimpleBenchReporter::new_with_id(*matches.get_one("id").unwrap()); 64 | let mut reporter = CoordinatedReporter::new( 65 | matches.get_one::("reporter_addr") 66 | .expect("failed to get the reporter_addr") 67 | .to_string().parse().unwrap(), 68 | inner_reporter, 69 | ) 70 | .await 71 | .expect("failed to create the reporter"); 72 | 73 | for _ in 0..10 { 74 | std::thread::sleep(std::time::Duration::from_secs(1)); 75 | let stat = runner.report_async(&mut reporter).await; 76 | // println!("Results: {}", stat); 77 | } 78 | }); 79 | 80 | runner.stop().unwrap(); 81 | 82 | info!("done"); 83 | } 84 | -------------------------------------------------------------------------------- /benchs/rdma_util/src/rc_doorbell.rs: -------------------------------------------------------------------------------- 1 | use KRdmaKit::{MemoryRegion, DatapathError}; 2 | use KRdmaKit::queue_pairs::QueuePair; 3 | use KRdmaKit::rdma_shim::bindings::*; 4 | use bench_util::doorbell::*; 5 | 6 | use core::iter::TrustedRandomAccessNoCoerce; 7 | use core::ops::Range; 8 | 9 | use std::sync::Arc; 10 | 11 | pub struct RcDoorbellHelper { 12 | send_doorbell: DoorbellHelper, 13 | send_qp: Arc, 14 | } 15 | 16 | impl RcDoorbellHelper { 17 | pub fn create(capacity: usize, qp: Arc) -> Self { 18 | Self { 19 | send_doorbell: DoorbellHelper::new(capacity), 20 | send_qp: qp, 21 | } 22 | } 23 | 24 | ///Init RcDoorbellHelper's internal doorbell with specific IBV_WR_OPCODE 25 | /// Since one-sided test may read or write, 26 | /// we leave the init(op) to be called by user to delay initialization. 27 | #[inline] 28 | pub fn init(&mut self, op: u32) { 29 | self.send_doorbell.init(op); 30 | } 31 | 32 | ///Post WR to `send_doorbell`'s next entry 33 | /// If `send_doorbell` is full, 34 | /// this func will call flush_doorbell() to send all batched WRs. 35 | pub fn post_send( 36 | &mut self, 37 | mr: &MemoryRegion, 38 | range: Range, 39 | signaled: bool, 40 | raddr: u64, 41 | rkey: u32, 42 | wr_id: u64 43 | ) -> Result<(), DatapathError> { 44 | self.send_doorbell.next(); 45 | /* set sge for current wr */ 46 | self.send_doorbell.cur_sge().addr = unsafe { mr.get_rdma_addr() + range.start }; 47 | self.send_doorbell.cur_sge().length = range.size() as u32; 48 | self.send_doorbell.cur_sge().lkey = mr.lkey().0; 49 | 50 | /* set wr fields */ 51 | let send_flag: i32 = if signaled { ibv_send_flags::IBV_SEND_SIGNALED as i32 } else { 0 }; 52 | self.send_doorbell.cur_wr().wr_id = wr_id; 53 | 54 | #[cfg(feature = "OFED_5_4")] 55 | { 56 | self.send_doorbell.cur_wr().send_flags = send_flag as u32; 57 | } 58 | 59 | #[cfg(not(feature = "OFED_5_4"))] 60 | { 61 | self.send_doorbell.cur_wr().send_flags = send_flag; 62 | } 63 | unsafe { 64 | self.send_doorbell.cur_wr().wr.rdma.as_mut().remote_addr = raddr; 65 | self.send_doorbell.cur_wr().wr.rdma.as_mut().rkey = rkey; 66 | } 67 | // no need to set imm_data for read/write 68 | 69 | let mut res = Ok(()); 70 | if self.send_doorbell.is_full() { 71 | // flush a doorbell 72 | self.send_doorbell.freeze(); 73 | res = self.flush_doorbell(); 74 | self.send_doorbell.clear(); 75 | } 76 | res 77 | } 78 | 79 | #[inline] 80 | pub fn flush_doorbell(&mut self) -> Result<(), DatapathError> { 81 | self.send_qp.post_send_wr(self.send_doorbell.first_wr_ptr()) 82 | } 83 | } -------------------------------------------------------------------------------- /benchs/bench_util/src/doorbell/rc_doorbell.rs: -------------------------------------------------------------------------------- 1 | use KRdmaKit::{MemoryRegion, DatapathError}; 2 | use KRdmaKit::queue_pairs::QueuePair; 3 | use KRdmaKit::rdma_shim::bindings::*; 4 | 5 | use crate::doorbell::DoorbellHelper; 6 | 7 | use core::iter::TrustedRandomAccessNoCoerce; 8 | use core::ops::Range; 9 | 10 | use std::sync::Arc; 11 | 12 | pub struct RcDoorbellHelper { 13 | send_doorbell: DoorbellHelper, 14 | send_qp: Arc, 15 | } 16 | 17 | impl RcDoorbellHelper { 18 | pub fn create(capacity: usize, qp: Arc) -> Self { 19 | Self { 20 | send_doorbell: DoorbellHelper::new(capacity), 21 | send_qp: qp, 22 | } 23 | } 24 | 25 | ///Init RcDoorbellHelper's internal doorbell with specific IBV_WR_OPCODE 26 | /// Since one-sided test may read or write, 27 | /// we leave the init(op) to be called by user to delay initialization. 28 | #[inline] 29 | pub fn init(&mut self, op: u32) { 30 | self.send_doorbell.init(op); 31 | } 32 | 33 | ///Post WR to `send_doorbell`'s next entry 34 | /// If `send_doorbell` is full, 35 | /// this func will call flush_doorbell() to send all batched WRs. 36 | pub fn post_send( 37 | &mut self, 38 | mr: &MemoryRegion, 39 | range: Range, 40 | signaled: bool, 41 | raddr: u64, 42 | rkey: u32, 43 | wr_id: u64 44 | ) -> Result<(), DatapathError> { 45 | self.send_doorbell.next(); 46 | /* set sge for current wr */ 47 | self.send_doorbell.cur_sge().addr = unsafe { mr.get_rdma_addr() + range.start }; 48 | self.send_doorbell.cur_sge().length = range.size() as u32; 49 | self.send_doorbell.cur_sge().lkey = mr.lkey().0; 50 | 51 | /* set wr fields */ 52 | let send_flag: i32 = if signaled { ibv_send_flags::IBV_SEND_SIGNALED as i32 } else { 0 }; 53 | self.send_doorbell.cur_wr().wr_id = wr_id; 54 | 55 | #[cfg(feature = "OFED_5_4")] 56 | { 57 | self.send_doorbell.cur_wr().send_flags = send_flag as u32; 58 | } 59 | 60 | #[cfg(not(feature = "OFED_5_4"))] 61 | { 62 | self.send_doorbell.cur_wr().send_flags = send_flag; 63 | } 64 | unsafe { 65 | self.send_doorbell.cur_wr().wr.rdma.as_mut().remote_addr = raddr; 66 | self.send_doorbell.cur_wr().wr.rdma.as_mut().rkey = rkey; 67 | } 68 | // no need to set imm_data for read/write 69 | 70 | let mut res = Ok(()); 71 | if self.send_doorbell.is_full() { 72 | // flush a doorbell 73 | self.send_doorbell.freeze(); 74 | res = self.flush_doorbell(); 75 | self.send_doorbell.clear(); 76 | } 77 | res 78 | } 79 | 80 | #[inline] 81 | pub fn flush_doorbell(&mut self) -> Result<(), DatapathError> { 82 | self.send_qp.post_send_wr(self.send_doorbell.first_wr_ptr()) 83 | } 84 | } -------------------------------------------------------------------------------- /benchs/bench_util/src/ud_manager.rs: -------------------------------------------------------------------------------- 1 | use tokio::time::timeout; 2 | use tokio::net::{ TcpListener }; 3 | use tokio::io::{ AsyncReadExt, AsyncWriteExt }; 4 | 5 | use std::time::Duration; 6 | use std::thread::JoinHandle; 7 | use std::{ thread, io }; 8 | use std::sync::{ Arc, RwLock }; 9 | use std::net::{ SocketAddr }; 10 | 11 | use std::collections::HashMap; 12 | 13 | use crate::ud_endpoint::*; 14 | use crate::MAX_MSG_SZ; 15 | use log::*; 16 | 17 | pub struct UdManager { 18 | pub listen_addr: SocketAddr, 19 | pub conn_meta: Arc>>>, 20 | metas_msg: Vec, 21 | running: *mut bool, 22 | } 23 | 24 | impl UdManager { 25 | pub fn new( 26 | listen_addr: SocketAddr, 27 | conn_meta: Arc>>>, 28 | metas_msg: Vec 29 | ) -> Arc { 30 | let running = Box::into_raw(Box::new(true)); 31 | Arc::new(Self { 32 | listen_addr: listen_addr, 33 | conn_meta: conn_meta, 34 | metas_msg: metas_msg, 35 | running: running, 36 | }) 37 | } 38 | } 39 | 40 | unsafe impl Send for UdManager {} 41 | unsafe impl Sync for UdManager {} 42 | 43 | impl UdManager { 44 | pub fn spawn_server_listener(self: &Arc) -> JoinHandle> { 45 | let running_addr: u64 = self.running as u64; 46 | let listener = self.clone(); 47 | thread::spawn(move || { 48 | tokio::runtime::Builder 49 | ::new_current_thread() 50 | .enable_all() 51 | .build() 52 | .unwrap() 53 | .block_on(listener.listen_inner(running_addr)) 54 | }) 55 | } 56 | 57 | async fn listen_inner (self: &Arc, running_ptr: u64) -> io::Result<()> { 58 | let mut meta_buff = [0; MAX_MSG_SZ as usize]; 59 | let listener = TcpListener::bind(self.listen_addr).await?; 60 | // background thread for handshake 61 | while unsafe { *(running_ptr as *mut bool) } { 62 | if let Ok(res) = timeout(Duration::from_secs(1), listener.accept()).await { 63 | let (mut socket, _) = res?; 64 | let byte_recv = socket.read(&mut meta_buff).await?; 65 | // println!("Recv a {}-byte message.", byte_recv); 66 | match byte_recv { 67 | 0 => { 68 | // TCP connection is shutdown 69 | unsafe { 70 | *(running_ptr as *mut bool) = false; 71 | } 72 | break; 73 | } 74 | _ => { 75 | info!("Recv a {}-byte connection message.", byte_recv); 76 | } 77 | } 78 | let (client_meta, client_id) = unmarshal_batch(&meta_buff[0..byte_recv]); 79 | let old_v = self.conn_meta.write().unwrap().insert(client_id, client_meta); 80 | if old_v.is_some() { 81 | panic!("Wrong in your bootstraping or programming: duplicated connection, client_id: {}", client_id); 82 | } 83 | let byte_send = socket.write(self.metas_msg.as_slice()).await?; 84 | assert!(byte_send != 0); 85 | } 86 | } 87 | Ok(()) 88 | } 89 | 90 | pub fn stop_listen(self : &Arc) { 91 | let running_addr: u64 = self.running as u64; 92 | unsafe { *(running_addr as *mut bool) = false; } 93 | } 94 | } -------------------------------------------------------------------------------- /benchs/bench_util/src/doorbell/ud_doorbell.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | use KRdmaKit::{ MemoryRegion, QueuePair, DatapathError, DatagramEndpoint }; 3 | use KRdmaKit::rdma_shim::bindings::*; 4 | use core::iter::TrustedRandomAccessNoCoerce; 5 | use core::ops::Range; 6 | 7 | use crate::doorbell::DoorbellHelper; 8 | use crate::MAX_INLINE_SZ; 9 | 10 | pub struct UdDoorbellHelper { 11 | send_doorbell: DoorbellHelper, 12 | send_qp: Arc, 13 | } 14 | 15 | impl UdDoorbellHelper { 16 | pub fn create(capacity: usize, op: u32, qp: Arc) -> Self { 17 | let mut ret = Self { 18 | send_doorbell: DoorbellHelper::new(capacity), 19 | send_qp: qp, 20 | }; 21 | ret.send_doorbell.init(op); 22 | ret 23 | } 24 | 25 | #[inline] 26 | pub fn sanity_check(&self) -> bool { 27 | let mut ret = true; 28 | for i in 0..self.send_doorbell.capacity { 29 | let sge_ptr = &self.send_doorbell.sges[i] as *const ibv_sge; 30 | let wr_sg_list = self.send_doorbell.wrs[i].sg_list; 31 | ret &= (sge_ptr as u64) == (wr_sg_list as u64); 32 | } 33 | ret 34 | } 35 | 36 | pub fn post_send( 37 | &mut self, 38 | endpoint: &DatagramEndpoint, 39 | mr: &MemoryRegion, 40 | range: Range, 41 | wr_id: u64, 42 | imm_data: Option, 43 | signaled: bool 44 | ) -> Result<(), DatapathError> { 45 | self.send_doorbell.next(); 46 | // setup sge fields 47 | self.send_doorbell.cur_sge().addr = unsafe { mr.get_rdma_addr() + range.start }; 48 | self.send_doorbell.cur_sge().length = range.size() as u32; 49 | self.send_doorbell.cur_sge().lkey = mr.lkey().0; 50 | // setup UD SEND wr fields 51 | unsafe { 52 | self.send_doorbell.cur_wr().wr.ud.as_mut().remote_qpn = endpoint.qpn(); 53 | self.send_doorbell.cur_wr().wr.ud.as_mut().remote_qkey = endpoint.qkey(); 54 | self.send_doorbell.cur_wr().wr.ud.as_mut().ah = endpoint 55 | .raw_address_handler_ptr() 56 | .as_ptr(); 57 | } 58 | self.send_doorbell.cur_wr().send_flags = match signaled { 59 | true => ibv_send_flags::IBV_SEND_SIGNALED.try_into().unwrap(), 60 | false => 0, 61 | }; 62 | self.send_doorbell.cur_wr().send_flags |= if range.size() <= MAX_INLINE_SZ { 63 | ibv_send_flags::IBV_SEND_INLINE.try_into().unwrap() 64 | } else { 65 | 0 66 | }; 67 | 68 | let imm = match imm_data { 69 | Some(i) => i, 70 | None => 0, 71 | }; 72 | 73 | #[cfg(feature = "OFED_5_4")] 74 | unsafe { 75 | *self.send_doorbell.cur_wr().__bindgen_anon_1.imm_data.as_mut() = imm; 76 | } 77 | #[cfg(not(feature = "OFED_5_4"))] 78 | { 79 | self.send_doorbell.cur_wr().imm_data = imm; 80 | } 81 | 82 | // info!("doorbell cur idx: {}, sansity: {}", self.send_doorbell.cur_idx, self.sanity_check()); 83 | 84 | let mut res = Ok(()); 85 | if self.send_doorbell.is_full() { 86 | // info!("doorbell is full"); 87 | self.send_doorbell.freeze(); 88 | res = self.flush(); 89 | self.send_doorbell.clear(); 90 | } 91 | res 92 | } 93 | 94 | #[inline] 95 | pub fn flush(&mut self) -> Result<(), DatapathError> { 96 | self.send_qp.post_send_wr(self.send_doorbell.first_wr_ptr()) 97 | } 98 | } -------------------------------------------------------------------------------- /benchs/rdma_util/src/ud_doorbell.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | use KRdmaKit::{ MemoryRegion, QueuePair, DatapathError, DatagramEndpoint }; 3 | use KRdmaKit::rdma_shim::bindings::*; 4 | use core::iter::TrustedRandomAccessNoCoerce; 5 | use core::ops::Range; 6 | use bench_util::doorbell::*; 7 | use log::*; 8 | 9 | pub const MAX_INLINE_SZ: usize = 64; 10 | 11 | pub struct UdDoorbellHelper { 12 | send_doorbell: DoorbellHelper, 13 | send_qp: Arc, 14 | } 15 | 16 | impl UdDoorbellHelper { 17 | pub fn create(capacity: usize, op: u32, qp: Arc) -> Self { 18 | let mut ret = Self { 19 | send_doorbell: DoorbellHelper::new(capacity), 20 | send_qp: qp, 21 | }; 22 | ret.send_doorbell.init(op); 23 | ret 24 | } 25 | 26 | #[inline] 27 | pub fn sanity_check(&self) -> bool { 28 | let mut ret = true; 29 | for i in 0..self.send_doorbell.capacity { 30 | let sge_ptr = &self.send_doorbell.sges[i] as *const ibv_sge; 31 | let wr_sg_list = self.send_doorbell.wrs[i].sg_list; 32 | ret &= (sge_ptr as u64) == (wr_sg_list as u64); 33 | } 34 | ret 35 | } 36 | 37 | pub fn post_send( 38 | &mut self, 39 | endpoint: &DatagramEndpoint, 40 | mr: &MemoryRegion, 41 | range: Range, 42 | wr_id: u64, 43 | imm_data: Option, 44 | signaled: bool 45 | ) -> Result<(), DatapathError> { 46 | self.send_doorbell.next(); 47 | // setup sge fields 48 | self.send_doorbell.cur_sge().addr = unsafe { mr.get_rdma_addr() + range.start }; 49 | self.send_doorbell.cur_sge().length = range.size() as u32; 50 | self.send_doorbell.cur_sge().lkey = mr.lkey().0; 51 | // setup UD SEND wr fields 52 | unsafe { 53 | self.send_doorbell.cur_wr().wr.ud.as_mut().remote_qpn = endpoint.qpn(); 54 | self.send_doorbell.cur_wr().wr.ud.as_mut().remote_qkey = endpoint.qkey(); 55 | self.send_doorbell.cur_wr().wr.ud.as_mut().ah = endpoint 56 | .raw_address_handler_ptr() 57 | .as_ptr(); 58 | } 59 | self.send_doorbell.cur_wr().send_flags = match signaled { 60 | true => ibv_send_flags::IBV_SEND_SIGNALED.try_into().unwrap(), 61 | false => 0, 62 | }; 63 | self.send_doorbell.cur_wr().send_flags |= if range.size() <= MAX_INLINE_SZ { 64 | ibv_send_flags::IBV_SEND_INLINE.try_into().unwrap() 65 | } else { 66 | 0 67 | }; 68 | 69 | let imm = match imm_data { 70 | Some(i) => i, 71 | None => 0, 72 | }; 73 | 74 | #[cfg(feature = "OFED_5_4")] 75 | unsafe { 76 | *self.send_doorbell.cur_wr().__bindgen_anon_1.imm_data.as_mut() = imm; 77 | } 78 | #[cfg(not(feature = "OFED_5_4"))] 79 | { 80 | self.send_doorbell.cur_wr().imm_data = imm; 81 | } 82 | 83 | // info!("doorbell cur idx: {}, sansity: {}", self.send_doorbell.cur_idx, self.sanity_check()); 84 | 85 | let mut res = Ok(()); 86 | if self.send_doorbell.is_full() { 87 | // info!("doorbell is full"); 88 | self.send_doorbell.freeze(); 89 | res = self.flush(); 90 | self.send_doorbell.clear(); 91 | } 92 | res 93 | } 94 | 95 | #[inline] 96 | pub fn flush(&mut self) -> Result<(), DatapathError> { 97 | self.send_qp.post_send_wr(self.send_doorbell.first_wr_ptr()) 98 | } 99 | } -------------------------------------------------------------------------------- /benchs/bench_util/src/doca/args.rs: -------------------------------------------------------------------------------- 1 | use std::sync::{ Arc }; 2 | 3 | use clap::{ Command, arg, Arg, ArgAction, Parser }; 4 | 5 | use rand::RngCore; 6 | use rand_chacha::ChaCha8Rng; 7 | 8 | use crate::round_up; 9 | use crate::CACHE_LINE_SZ; 10 | 11 | #[derive(Parser)] 12 | pub struct CmdlineArgs { 13 | /* Common fields of client and server */ 14 | 15 | /// The PCIe device list (we suggest using one) 16 | #[arg(short, long)] 17 | pub pci_dev: Vec, 18 | 19 | /// Memory region bytes of the server 20 | #[arg(long, default_value_t = 10 * 1024)] 21 | pub random_space: u64, 22 | 23 | /// The life of the bench (seconds) 24 | #[arg(long, default_value_t = 15)] 25 | pub life: u32, 26 | 27 | /// The listening address of server 28 | #[arg(long)] 29 | pub listen_addr: String, 30 | 31 | /// Whether to allcate memory regions using huge pages 32 | #[arg(long)] 33 | pub huge_page: bool, 34 | 35 | /* Client-specific fields */ 36 | /// Client id, which will be used to generate unique seed 37 | #[arg(short, long, default_value_t = 0)] 38 | pub client_id: u64, 39 | 40 | /// Number of threads used 41 | #[arg(short, long, default_value_t = 1)] 42 | pub threads: u64, 43 | 44 | /// Payload of each request 45 | #[arg(long, default_value_t = 32)] 46 | pub payload: u64, 47 | 48 | /// Client-local memory region bytes 49 | #[arg(long, default_value_t = 4096)] 50 | pub local_mr: u64, 51 | 52 | /// Whether to run READ bench 53 | #[arg(long)] 54 | pub read: bool, 55 | 56 | /// Whether to separate thread access area 57 | #[arg(long)] 58 | pub fixed: bool, 59 | 60 | /// The random access area bytes 61 | #[arg(long, default_value_t = 8192)] 62 | pub thread_gap: u64, 63 | 64 | /// Whether to run lantency test 65 | #[arg(long)] 66 | pub latency_test: bool, 67 | 68 | /// Number of requests in a batch 69 | #[arg(long, default_value_t = 64)] 70 | pub batch_size: usize, 71 | 72 | /* Server-specific fields */ 73 | /// Whether to run bench as the server 74 | #[arg(long)] 75 | pub server: bool, 76 | } 77 | 78 | impl Clone for CmdlineArgs { 79 | fn clone(&self) -> Self { 80 | Self { 81 | pci_dev: self.pci_dev.clone(), 82 | listen_addr: self.listen_addr.clone(), 83 | ..*self 84 | } 85 | } 86 | } 87 | 88 | impl CmdlineArgs { 89 | /// coordinate the arguments to make them consistent with each other 90 | pub fn coordinate(&mut self) { 91 | self.local_mr = std::cmp::max(self.batch_size as u64 * self.payload, self.local_mr); 92 | self.thread_gap = std::cmp::max(self.payload, self.thread_gap); 93 | self.random_space = std::cmp::max(self.payload, self.random_space); 94 | self.random_space = std::cmp::max(self.threads * self.thread_gap, self.random_space); 95 | } 96 | 97 | /// get next index to access in the random region 98 | pub fn get_next_index(&self, thread_idx: usize, rand: &mut ChaCha8Rng) -> u64 { 99 | let mut r = rand.next_u64(); 100 | 101 | if self.payload == self.random_space { 102 | return 0; 103 | } 104 | 105 | if self.fixed { 106 | if self.thread_gap != 0 { 107 | // r = (thread_idx * 64) as _; 108 | assert!(self.thread_gap >= self.payload); 109 | assert!(self.threads * self.thread_gap <= self.random_space); 110 | r = (r % self.thread_gap) + (thread_idx as u64) * self.thread_gap; 111 | } else { 112 | r = 0; 113 | } 114 | } 115 | 116 | // align 117 | r = round_up(r, CACHE_LINE_SZ as i64); 118 | assert_eq!(r % CACHE_LINE_SZ, 0); 119 | 120 | let index = (r % (self.random_space - self.payload)) as u64; 121 | index 122 | } 123 | } -------------------------------------------------------------------------------- /benchs/doca_dma/src/bootstrap/server_construct.rs: -------------------------------------------------------------------------------- 1 | use std::sync::atomic::{ compiler_fence, Ordering }; 2 | use std::slice; 3 | use std::net::{ SocketAddr, TcpStream }; 4 | use std::io::Write; 5 | use std::time::Duration; 6 | use std::ptr::{ NonNull, null_mut }; 7 | use std::sync::Arc; 8 | 9 | use bench_util::doca::args::CmdlineArgs; 10 | use crate::bootstrap::*; 11 | use bench_util::round_up; 12 | 13 | use netbencher_core::*; 14 | use log::info; 15 | 16 | use doca::dma::DOCAContext; 17 | use doca::{ DOCAMmap, DOCARegisteredMemory, BufferInventory, DOCAWorkQueue, DMAEngine, RawPointer, RawPointerMsg }; 18 | 19 | use nix::libc::*; 20 | 21 | use crate::bootstrap::connection::*; 22 | 23 | fn open_doca_device(pci_devs: &Vec) -> (Arc, usize) { 24 | let num_dev = pci_devs.len(); 25 | let mut local_mmap = DOCAMmap::new().unwrap(); 26 | 27 | for d in pci_devs.iter() { 28 | let device = doca::device::open_device_with_pci(d.as_str()).unwrap(); 29 | let dev_idx = local_mmap.add_device(&device).unwrap(); 30 | } 31 | /* populate the buffer info to mmap */ 32 | (Arc::new(local_mmap), num_dev) 33 | } 34 | 35 | 36 | fn send_doca_config(addr: SocketAddr, num_dev: usize, mut doca_mmap: Arc, src_buf: RawPointer) { 37 | let mut stream = TcpStream::connect(addr).unwrap(); 38 | let mut doca_conn: DocaConnInfo = Default::default(); 39 | 40 | for i in 0..num_dev { 41 | let export_desc = unsafe { 42 | Arc::get_mut_unchecked(&mut doca_mmap).export(i).unwrap() 43 | }; 44 | doca_conn.exports.push(unsafe { 45 | slice::from_raw_parts_mut(export_desc.inner.as_ptr() as *mut _, export_desc.payload).to_vec() 46 | }); 47 | } 48 | doca_conn.buffers.push(src_buf); 49 | stream.write(DocaConnInfo::serialize(doca_conn).as_slice()).unwrap(); 50 | } 51 | 52 | pub fn perform_server_routine(runner: Arc>, args: CmdlineArgs) 53 | where T: Send + 'static + Sync + Copy 54 | { 55 | for i in 0..args.pci_dev.len() { 56 | println!("pcie dev 0: {}", &args.pci_dev[i]); 57 | } 58 | /* allocate local memory region */ 59 | let mut src_buffer = vec![0u8; args.random_space as usize].into_boxed_slice(); 60 | let src_region = RawPointer { 61 | inner: match args.huge_page { 62 | false => { 63 | NonNull::new(src_buffer.as_mut_ptr() as *mut _).unwrap() 64 | } 65 | true => { 66 | let capacity = round_up(args.random_space, 2 << 20); 67 | let data = unsafe { 68 | mmap( 69 | null_mut(), 70 | capacity as size_t, 71 | PROT_READ | PROT_WRITE, 72 | MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE | MAP_HUGETLB, 73 | -1, 74 | 0 75 | ) 76 | }; 77 | 78 | if data == MAP_FAILED { 79 | panic!("Failed to create huge-page MR"); 80 | } 81 | NonNull::new(data).unwrap() 82 | } 83 | }, 84 | payload: args.random_space as usize, 85 | }; 86 | 87 | /* open all doca devices specified by the user and register the host memory region */ 88 | let (local_mmap, num_dev) = open_doca_device(&args.pci_dev); 89 | local_mmap.populate(src_region).unwrap(); 90 | 91 | /* and send the export_desc and src_buffer to dpu */ 92 | send_doca_config( 93 | args.listen_addr.parse().unwrap(), 94 | num_dev, 95 | local_mmap.clone(), 96 | src_region 97 | ); 98 | 99 | /* keep the server alive until runner stop */ 100 | while runner.running() { 101 | compiler_fence(Ordering::SeqCst); 102 | } 103 | 104 | // unmap/dealloc the buffer 105 | if args.huge_page { 106 | // unmap hugepages 107 | } else { 108 | // dealloc normal memory pages 109 | } 110 | 111 | info!("Server exit."); 112 | } -------------------------------------------------------------------------------- /benchs/one_sided_rdma/src/bootstrap/mod.rs: -------------------------------------------------------------------------------- 1 | //! This module encapsulates the bootstrap functions for the one-sided RDMA benchmark. It provides functions to bootstrap the client and server sides of the benchmark. The client functions are encapsulated in `client_construct` 2 | //! module and the server functions are encapsulated in `server_construct` module. 3 | //! The module also defines constants for the maximum number of clients and minimum server life. The `bootstrap_client` function is used to bootstrap 4 | //! the client side of the benchmark and the `bootstrap_server` function is used to bootstrap the server side of the benchmark. The module also imports necessary dependencies and defines a `BenchRunner` struct to run the benchmark. 5 | //! Example of using in bench code: 6 | //! ```rust 7 | //! let server = true; 8 | //! if server { 9 | //! bootstrap::bootstrap_server(); 10 | //! } else { 11 | //! bootstrap::bootstrap_client(); 12 | //! } 13 | //!``` 14 | 15 | mod client_construct; 16 | pub use client_construct::{ 17 | perform_client_routine, 18 | perform_client_doorbell_routine, 19 | perform_client_signaled_routine, 20 | perform_client_doorbell_signaled_routine, 21 | }; 22 | 23 | mod server_construct; 24 | pub use server_construct::perform_server_routine; 25 | 26 | use std::{ thread, time }; 27 | use std::time::Duration; 28 | use tokio::runtime::Runtime; 29 | 30 | use bench_util::args::*; 31 | use bench_util::*; 32 | 33 | use netbencher_core::{ 34 | CoordinatedReporterMaster, 35 | BenchRunner, 36 | SimpleBenchReporter, 37 | CoordinatedReporter, 38 | }; 39 | 40 | use log::*; 41 | 42 | // Client bootstrap function 43 | pub fn bootstrap_client(args: CmdlineArgs) { 44 | let mut runner = BenchRunner::new(args.threads.try_into().unwrap()); 45 | runner.run(|thread_id, runner, stat, args| { 46 | match (args.doorbell, args.signaled) { 47 | (false, false) => { 48 | perform_client_routine(thread_id, runner, stat, args); 49 | } 50 | (true, false) => { 51 | info!("features: doorbell"); 52 | perform_client_doorbell_routine(thread_id, runner, stat, args); 53 | } 54 | (false, true) => { 55 | info!("features: signaled"); 56 | perform_client_signaled_routine(thread_id, runner, stat, args); 57 | } 58 | (true, true) => { 59 | info!("features: doorbell,signaled"); 60 | perform_client_doorbell_signaled_routine(thread_id, runner, stat, args); 61 | } 62 | } 63 | }, args.clone()); 64 | 65 | let mut inner_reporter = SimpleBenchReporter::new_with_id(args.client_id.try_into().unwrap()); 66 | 67 | if args.report { 68 | Runtime::new() 69 | .unwrap() 70 | .block_on(async { 71 | let mut reporter = CoordinatedReporter::new( 72 | args.report_addr.parse().unwrap(), 73 | inner_reporter 74 | ).await.expect("failed to create the reporter"); 75 | 76 | // send a report to the master 77 | for epoch in 0..args.life { 78 | thread::sleep(time::Duration::from_secs(1)); 79 | runner.report_async(&mut reporter).await; 80 | } 81 | }); 82 | } else { 83 | for epoch in 0..args.life { 84 | thread::sleep(time::Duration::from_secs(1)); 85 | info!("{}", runner.report(&mut inner_reporter)); 86 | } 87 | } 88 | runner.stop().unwrap(); 89 | } 90 | 91 | // Server bootstrap function 92 | pub fn bootstrap_server(mut args: CmdlineArgs) { 93 | if args.life < MIN_SERVER_LIFE { 94 | args.life = MIN_SERVER_LIFE; 95 | } 96 | 97 | let mut runner = BenchRunner::new(1); 98 | runner.run(|thread_id, runner, stat, args| { perform_server_routine(runner, args); }, args.clone()); 99 | 100 | if args.report { 101 | Runtime::new() 102 | .unwrap() 103 | .block_on(async { 104 | let mut master = CoordinatedReporterMaster::new( 105 | MAX_CLIENTS, 106 | args.report_addr.parse().unwrap() 107 | ).await.expect("failed to create the master"); 108 | 109 | master 110 | .report_event_loop( 111 | Duration::from_secs(args.life.into()), 112 | Duration::from_secs(1) 113 | ).await 114 | .expect("event loop report error"); 115 | }); 116 | } else { 117 | thread::sleep(Duration::from_secs(args.life.into())); 118 | } 119 | runner.stop().unwrap(); 120 | } -------------------------------------------------------------------------------- /benchs/bench_util/src/doorbell/doorbell.rs: -------------------------------------------------------------------------------- 1 | use KRdmaKit::rdma_shim::bindings::*; 2 | 3 | /// We hardcoded the maximum batch size. 4 | /// Typically, NIC doesn't expect a very large batch size. 5 | pub const MAX_BATCH_SZ: usize = 64; 6 | 7 | ///A struct to help send doorbell. 8 | /// It contains wrs and sges to save requests 9 | /// 10 | pub struct DoorbellHelper { 11 | pub wrs: [ibv_send_wr; MAX_BATCH_SZ], 12 | pub sges: [ibv_sge; MAX_BATCH_SZ], 13 | pub capacity: usize, 14 | cur_idx: isize, 15 | } 16 | 17 | impl DoorbellHelper { 18 | pub fn new(capacity: usize) -> Self { 19 | Self { 20 | capacity, 21 | cur_idx: -1, 22 | wrs: [Default::default(); MAX_BATCH_SZ], 23 | sges: [ibv_sge { 24 | addr: 0, 25 | length: 0, 26 | lkey: 0, 27 | }; MAX_BATCH_SZ], 28 | } 29 | } 30 | 31 | /// Create a DoorbellHelp, and initailize all its wrs and sges 32 | /// # Arguments 33 | /// - `capacity` is the max batch size of the doorbll 34 | /// - `op` is the ib operation shared by all entries in this doorbell 35 | pub fn create(capacity: usize, op: u32) -> Self { 36 | let mut ret = Self { 37 | capacity, 38 | cur_idx: -1, 39 | wrs: [Default::default(); MAX_BATCH_SZ], 40 | sges: [ibv_sge { 41 | addr: 0, 42 | length: 0, 43 | lkey: 0, 44 | }; MAX_BATCH_SZ], 45 | }; 46 | ret.init(op); 47 | ret 48 | } 49 | 50 | #[inline] 51 | pub fn init(&mut self, op: u32) { 52 | for i in 0..self.capacity { 53 | self.wrs[i].opcode = op; 54 | self.wrs[i].num_sge = 1; 55 | self.wrs[i].next = &mut self.wrs[(i + 1) % self.capacity] as *mut ibv_send_wr; 56 | self.wrs[i].sg_list = &mut self.sges[i] as *mut ibv_sge; 57 | } 58 | } 59 | 60 | #[inline] 61 | pub fn sanity_check(&self) -> bool { 62 | let mut ret = true; 63 | for i in 0..self.capacity { 64 | let sge_ptr = &(self.sges[i]) as *const ibv_sge; 65 | let wr_sg_list = self.wrs[i].sg_list; 66 | ret &= (sge_ptr as u64) == (wr_sg_list as u64); 67 | } 68 | ret 69 | } 70 | 71 | /// Return current batching size 72 | #[inline] 73 | pub fn size(&self) -> isize { 74 | self.cur_idx + 1 75 | } 76 | #[inline] 77 | pub fn is_empty(&self) -> bool { 78 | self.size() <= 0 79 | } 80 | #[inline] 81 | pub fn empty(&mut self) { 82 | self.cur_idx = -1; 83 | } 84 | #[inline] 85 | pub fn is_full(&self) -> bool { 86 | self.size() >= self.capacity as isize 87 | } 88 | 89 | /// Get the next doorbell entry 90 | /// # Return value 91 | /// - `true` means the doorbell batching size is less than `capacity`, it is ok to add a new doorbell 92 | /// - `false` means doorbell is full, cannot add new entry 93 | /// 94 | /// User shall check its return value 95 | #[inline] 96 | pub fn next(&mut self) -> bool { 97 | if self.is_full() { 98 | return false; 99 | } 100 | self.cur_idx += 1; 101 | true 102 | } 103 | } 104 | 105 | impl DoorbellHelper { 106 | // Before flushing the doorbell, we must freeze it to prevent adding 107 | #[inline] 108 | pub fn freeze(&mut self) { 109 | assert!(!self.is_empty()); // should not be empty 110 | self.cur_wr().next = core::ptr::null_mut(); 111 | } 112 | 113 | // After flushing the doorbell, unfreeze it 114 | #[inline] 115 | pub fn freeze_done(&mut self) { 116 | assert!(!self.is_empty()); 117 | if self.cur_idx == (self.capacity - 1) as isize { 118 | self.wrs[self.cur_idx as usize].next = &mut self.wrs[0] as *mut ib_send_wr; 119 | } else { 120 | self.wrs[self.cur_idx as usize].next = 121 | &mut self.wrs[(self.cur_idx + 1) as usize] as *mut ib_send_wr; 122 | } 123 | } 124 | 125 | #[inline] 126 | pub fn clear(&mut self) { 127 | self.freeze_done(); 128 | self.cur_idx = -1; 129 | } 130 | // Return the ptr to current doorbell entry's wr 131 | #[inline] 132 | pub fn cur_wr(&mut self) -> &mut ib_rdma_wr { 133 | return if self.is_empty() { 134 | &mut self.wrs[0] 135 | } else { 136 | &mut self.wrs[self.cur_idx as usize] 137 | }; 138 | } 139 | // Return the ptr to current doorbell entry's sge 140 | #[inline] 141 | pub fn cur_sge(&mut self) -> &mut ibv_sge { 142 | return if self.is_empty() { 143 | &mut self.sges[0] 144 | } else { 145 | &mut self.sges[self.cur_idx as usize] 146 | }; 147 | } 148 | 149 | #[inline] 150 | pub fn first_wr_ptr(&mut self) -> *mut ib_send_wr { 151 | &mut self.wrs[0] as *mut ibv_send_wr 152 | } 153 | // Return the ptr to specified doorbell entry's wr 154 | // **WRRN**: No check for idx. The caller has to take care of it by himself 155 | #[inline] 156 | pub fn get_wr_ptr(&mut self, idx: usize) -> *mut ib_rdma_wr { 157 | &mut self.wrs[idx] as *mut ibv_send_wr 158 | } 159 | // Return the ptr to specified doorbell entry's sge 160 | #[inline] 161 | pub fn get_sge_ptr(&mut self, idx: usize) -> *mut ibv_sge { 162 | &mut self.sges[idx] as *mut ibv_sge 163 | } 164 | } 165 | -------------------------------------------------------------------------------- /benchs/docs/dma.md: -------------------------------------------------------------------------------- 1 | # DOCA DMA 2 | 3 | ## Quick start 4 | 5 | ### Build 6 | 7 | DOCA DMA required a DOCA environment, you can refer to our [rust-doca](https://ipads.se.sjtu.edu.cn:1312/distributed-rdma-serverless/smartnic-project/rust-doca) or [DOCA SDK](https://docs.nvidia.com/doca/sdk/index.html) for DOCA installation. 8 | 9 | To build all binary files required for our benchmark, run: 10 | ```bash 11 | cd bench/doca_dma 12 | cargo build --release 13 | ``` 14 | 15 | If your OFED version is >= 5.0, we offer a cargo feature to enable your building: 16 | ```bash 17 | cargo build --release --features "OFED_5_4" 18 | ``` 19 | 20 | If to build on DPU SoC, another feature is required: 21 | ```bash 22 | cargo build --release --features "OFED_5_4 ARM" 23 | ``` 24 | 25 | Having the similar output on the terminal means you have succeeded: 26 | ```bash 27 | Finished release [optimized] target(s) in 4.09s 28 | ``` 29 | 30 | All binary files can be found in `bench/target/release`. 31 | 32 | ### Run 33 | 34 | In this section, we will talk about how to run a RDMA test manually. To run our test, one server and multiple clients are required. All of them must be installed with Infiniband RDMA NICs. You can refer to our paper for the hardware environment. 35 | 36 | At the server's terminal, type in: 37 | ``` 38 | ./one_sided_rdma --server --addr ${server_ip}:${listen_port} 39 | ``` 40 | 41 | User must confirm availability of `server_ip` and `listen_port`, so that our benchmarks can build RDMA connections based on TCP. 42 | 43 | At each client's terminal, type in: 44 | ``` 45 | ./one_sided_rdma --addr ${server_ip}:${listen_port} 46 | ``` 47 | The `server_ip` and `listen_port` is just the same as server's. 48 | 49 | The default mode of bench is WRITE. If clients print logs similar to the following, then you have succeeded: 50 | ``` 51 | 06:54:10 [INFO] @0 Throughput: 7.72 Mops/s, Avg Latency: 0.13 µs 52 | 06:54:11 [INFO] @0 Throughput: 7.95 Mops/s, Avg Latency: 0.13 µs 53 | 06:54:12 [INFO] @0 Throughput: 7.96 Mops/s, Avg Latency: 0.13 µs 54 | ... 55 | 06:54:10 [INFO] @0 Throughput: 7.72 Mops/s, Avg Latency: 0.13 µs 56 | 06:54:11 [INFO] @0 Throughput: 7.95 Mops/s, Avg Latency: 0.13 µs 57 | 06:54:12 [INFO] @0 Throughput: 7.96 Mops/s, Avg Latency: 0.13 µs 58 | ... 59 | ``` 60 | 61 | ### Common arguments 62 | 63 | Our benchmark support configuring tests with command line arguments. Here are some common ones: 64 | 65 | |Client side flag|Description|Default| 66 | |---|---|---| 67 | |--payload|Payload(byte) for RDMA requests.|32| 68 | |--client-id|ID of the benchmarking client. Clients will generate different access patterns according to their client ids.|0| 69 | |--threads|Threads number.|1| 70 | |--life|How long will the client live(seconds).|15| 71 | |--huge-page|Whether or not to use huge page for memory region.|N/A| 72 | 73 | |Server side flag|Description|Default| 74 | |---|---|---| 75 | |--huge-page|Whether or not to use huge page for memory region.|N/A| 76 | |--life|How long will the server live(seconds).|30| 77 | 78 | You can check more flags with `--help`. 79 | 80 | ### Switch to READ mode 81 | 82 | For one-sided tests, we use `--read` flag to specify READ test. So a read client shall use the command: 83 | 84 | ```bash 85 | ./one_sided_rdma --addr ${server_ip}:${listen_port} --read 86 | ``` 87 | 88 | ### Get the average latency 89 | 90 | By default, our tests are targeted at maximizing throughput. 91 | We can switch to minimizing latency by using the following flags at the client: 92 | 93 | ```bash 94 | ./one_sided_rdma --addr ${server_ip}:${listen_port} --threads 1 --factor 1 --latency 95 | ``` 96 | 97 | You can then specify the READ test with `--read`, and modify payload with `--payload`. 98 | 99 | ### Change NIC device 100 | 101 | By default, our tests use the first NIC device found. Sometimes, the RNIC you want to test might not be not the first. In these cases, we offer `--nic-idx` to allow user to choose NIC device. 102 | 103 | You can check your NIC device id using: 104 | 105 | ```bash 106 | ibv_devinfo 107 | ``` 108 | 109 | Client can use NIC n by typing: 110 | 111 | ```bash 112 | ./one_sided_rdma --addr ${server_ip}:${listen_port} --nic-idx n 113 | ``` 114 | 115 | Similarly, server can use the following: 116 | 117 | ```bash 118 | ./one_sided_rdma --server --addr ${server_ip}:${listen_port} --nic-idx n 119 | ``` 120 | 121 | ### Accelerate posting w/ doorbell 122 | 123 | In one-sided tests, client can apply doorbell batching to accelerating posting, but according to our experience, we suggest you use it carefully in some cases. You can refer to our paper for details. 124 | 125 | The doorbell batching is activated by typing: 126 | 127 | ```bash 128 | ./one_sided_rdma --addr ${server_ip}:${listen_port} --doorbell 129 | ``` 130 | 131 | If you want to change the batch size (i.e. factor) or the doorbell size (i.e. db_size), remember to make sure that `db_size <= factor`. 132 | 133 | ### Add reporting for clients 134 | 135 | Collecting throughput or latency logs from multiple clients is troublesome. We provide a optional report function, which will collect each client's average throughput and latency and merge them. 136 | 137 | To use this function, first choose a unused port at server, denoted as `report_port`, this port must be different from `listen_port`. Add this argument to server with `--report-addr` flag: 138 | 139 | ```bash 140 | ./one_sided_rdma --server --addr ${server_ip}:${listen_port} --report-addr ${server_ip}:${report_port} 141 | ``` 142 | 143 | And for each client, use: 144 | 145 | ```bash 146 | ./one_sided_rdma --addr ${server_ip}:${listen_port} --report-addr ${server_ip}:${report_port} 147 | ``` -------------------------------------------------------------------------------- /benchs/docs/doca_dma.md: -------------------------------------------------------------------------------- 1 | # DOCA DMA 2 | 3 | ## Quick start 4 | 5 | ### Build 6 | 7 | DOCA DMA required a DOCA environment, you can refer to our [rust-doca](https://ipads.se.sjtu.edu.cn:1312/distributed-rdma-serverless/smartnic-project/rust-doca) or [DOCA SDK](https://docs.nvidia.com/doca/sdk/index.html) for DOCA installation. 8 | 9 | To build all binary files required for our benchmark, run: 10 | ```bash 11 | cd bench/doca_dma 12 | cargo build --release 13 | ``` 14 | 15 | If your OFED version is >= 5.0, we offer a cargo feature to enable your building: 16 | ```bash 17 | cargo build --release --features "OFED_5_4" 18 | ``` 19 | 20 | If to build on DPU SoC, another feature is required: 21 | ```bash 22 | cargo build --release --features "OFED_5_4 ARM" 23 | ``` 24 | 25 | Having the similar output on the terminal means you have succeeded: 26 | ```bash 27 | Finished release [optimized] target(s) in 4.09s 28 | ``` 29 | 30 | All binary files can be found in `bench/target/release`. 31 | 32 | ### Run 33 | 34 | In this section, we will talk about how to run a DMA test manually. To run our test, one server with DPU installed is required. You can refer to our paper for the hardware environment. 35 | 36 | At SoC's terminal, type in: 37 | ```bash 38 | ./doca_rdma --addr ${server_ip}:${listen_port} -p ${pcie_dev} 39 | ``` 40 | 41 | - --addr: User must confirm availability of `server_ip` and `listen_port`, so that our benchmarks can build RDMA connections based on TCP. 42 | 43 | - -p: The `pcie_dev` is the PCIe dev id of DPU shown in the output of `lspci`. If you are using a 2-port DPU like us, just pick one device as the argument. Our experience shows that the number of devices used here will not affect performance. 44 | 45 | This command will start a SoC instance, which will wait until the host instance is ready. Then, it will post DMA read/write requests to host. 46 | 47 | The default mode of bench is WRITE. If the SoC instance prints logs similar to the following, then you have succeeded: 48 | ```bash 49 | 06:26:59 [INFO] @0 Throughput: 3.971 Mops/s, Avg Latency: 0.25 µs 50 | 06:27:00 [INFO] @0 Throughput: 4.163 Mops/s, Avg Latency: 0.24 µs 51 | 06:27:01 [INFO] @0 Throughput: 4.164 Mops/s, Avg Latency: 0.24 µs 52 | ... 53 | ``` 54 | 55 | 56 | 57 | Then, at the host's terminal, type in: 58 | ```bash 59 | ./doca_rdma --server --addr ${server_ip}:${listen_port} -p ${pcie_dev} 60 | ``` 61 | 62 | - --addr User must make sure the `server_ip` and `listen_port` are consistent with SoC's. 63 | 64 | - -p The `pcie_dev` here represent the same PCIe device with that of SoC. 65 | 66 | This command will start the host instance, which will accept connection from SoC and run silently. The host instance should run than SoC, and our default setting is as follows: 67 | 68 | - SoC: `--life 15`, in seconds 69 | - host: `--life 30`, in seconds 70 | 71 | ### Common arguments 72 | 73 | The command line arguments of DOCA DMA are similar to RDMA tests, you can check them with `--help` or `-h`. 74 | 75 | User shall provide arguments following these rules: 76 | 77 | 1. `threads * thread-gap <= random-space` 78 | 2. `payload <= random-space` 79 | 3. `payload <= thread-gap` 80 | 4. `batch-size * payload <= local-mr` 81 | 82 | Otherwise, our program will try its best to rewrite the arguments to keep the invariants. 83 | 84 | ### Switch to READ mode 85 | 86 | For DMA tests, we also use `--read` flag to specify READ test. So a read client shall use the command: 87 | 88 | ```bash 89 | ./doca_rdma --addr ${server_ip}:${listen_port} -p ${pcie_dev} --read 90 | ``` 91 | 92 | ### Get the average latency 93 | 94 | By default, our tests are targeted at maximizing throughput. 95 | We can switch to minimizing latency by using the following flags at the client: 96 | 97 | ```bash 98 | ./doca_rdma --addr ${server_ip}:${listen_port} -p ${pcie_dev} --threads 1 --batch-size 1 99 | ``` 100 | 101 | You can then specify the READ test with `--read`, and modify payload with `--payload`. 102 | 103 | ### Modify the batch size 104 | 105 | We found that sending large batch of large DMA requests is very error-prone, we suggest you decrease the batch size as the payload grows: 106 | 107 | ```bash 108 | ./doca_rdma --addr ${server_ip}:${listen_port} -p ${pcie_dev} --batch-size n 109 | ``` 110 | 111 | ### Modify the host region size 112 | 113 | With larger payload, larger host memory region is required. User can modify the default host region to R bytes with the following: 114 | ```bash 115 | ./doca_rdma --server --addr ${server_ip}:${listen_port} -p ${pcie_dev} --random-space R 116 | ``` 117 | 118 | The host's `random_space` shall be equal to that of SoC. 119 | 120 | ### Fine-grained randomization 121 | 122 | By default, each client thread will access the whole random region, but with different seed. We allow user to separate an area for each thread, and the thread will make sure its random access is limited in the area: 123 | 124 | ```bash 125 | ./doca_rdma --server --addr ${server_ip}:${listen_port} -p ${pcie_dev} --fixed 126 | ``` 127 | 128 | The area is of `thread_gap` size, our bench makes sure that `thread_gap >= payload`, you can increase the thread_gap with `--thread-gap`, but you need to check that `threads * thread_gap <= random_space`. 129 | 130 | ## Results for reference 131 | 132 | ### DMA read 133 | 134 | |Payload|Peek throughput (M reqs/sec)| 135 | |---|---| 136 | |16|10.5| 137 | |64|10.5| 138 | |256|10.5| 139 | |1024|6.82| 140 | |4096|3.87| 141 | |16384|1.5| 142 | |65536|0.39| 143 | 144 | ### DMA write 145 | 146 | |Payload|Peek throughput (M reqs/sec)| 147 | |---|---| 148 | |16|10.19| 149 | |64|10.19| 150 | |256|10.19| 151 | |1024|9.02| 152 | |4096|3.71| 153 | |16384|0.94| 154 | |65536|0.23| -------------------------------------------------------------------------------- /benchs/docs/one_sided_rdma.md: -------------------------------------------------------------------------------- 1 | # ONE SIDED RDMA 2 | 3 | ## Quick start 4 | 5 | ### Build 6 | 7 | To build all binary files required for our benchmark, run: 8 | ```bash 9 | cd bench/one_sided_rdma 10 | cd bench/one_sided_rdma 11 | cargo build --release 12 | ``` 13 | 14 | If your OFED version is >= 5.0, we offer a cargo feature to enable your building: 15 | ```bash 16 | cargo build --release --features "OFED_5_4" 17 | ``` 18 | 19 | If to build on DPU SoC, another feature is required: 20 | ```bash 21 | cargo build --release --features "OFED_5_4 ARM" 22 | ``` 23 | 24 | Having the similar output on the terminal means you have succeeded: 25 | 26 | ```bash 27 | Finished release [optimized] target(s) in 4.09s 28 | ``` 29 | 30 | All binary files can be found in `bench/target/release`. 31 | 32 | ### Run 33 | 34 | In this section, we will talk about how to run a RDMA test manually. To run our test, one server and multiple clients are required. All of them must be installed with Infiniband RDMA NICs. You can refer to our paper for the hardware environment. 35 | 36 | At the server's terminal, type in: 37 | ```bash 38 | ./one_sided_rdma --server --addr ${server_ip}:${listen_port} 39 | ``` 40 | 41 | User must confirm availability of `server_ip` and `listen_port`, so that our benchmarks can build RDMA connections based on TCP. 42 | 43 | At each client's terminal, type in: 44 | ```bash 45 | ./one_sided_rdma --addr ${server_ip}:${listen_port} 46 | ``` 47 | The `server_ip` and `listen_port` is just the same as server's. 48 | 49 | The default mode of bench is WRITE. If clients print logs similar to the following, then you have succeeded: 50 | ```bash 51 | 06:54:10 [INFO] @0 Throughput: 7.72 Mops/s, Avg Latency: 0.13 µs 52 | 06:54:11 [INFO] @0 Throughput: 7.95 Mops/s, Avg Latency: 0.13 µs 53 | 06:54:12 [INFO] @0 Throughput: 7.96 Mops/s, Avg Latency: 0.13 µs 54 | ... 55 | 06:54:10 [INFO] @0 Throughput: 7.72 Mops/s, Avg Latency: 0.13 µs 56 | 06:54:11 [INFO] @0 Throughput: 7.95 Mops/s, Avg Latency: 0.13 µs 57 | 06:54:12 [INFO] @0 Throughput: 7.96 Mops/s, Avg Latency: 0.13 µs 58 | ... 59 | ``` 60 | 61 | ### Common arguments 62 | 63 | Our benchmark support configuring tests with command line arguments. Here are some common ones: 64 | 65 | |Client side flag|Description|Default| 66 | |---|---|---| 67 | |--payload|Payload(byte) for RDMA requests.|32| 68 | |--client-id|ID of the benchmarking client. Clients will generate different access patterns according to their client ids.|0| 69 | |--threads|Threads number.|1| 70 | |--life|How long will the client live(seconds).|15| 71 | |--huge-page|Whether or not to use huge page for memory region.|N/A| 72 | 73 | |Server side flag|Description|Default| 74 | |---|---|---| 75 | |--huge-page|Whether or not to use huge page for memory region.|N/A| 76 | |--life|How long will the server live(seconds).|30| 77 | 78 | You can check more flags with `--help`. 79 | 80 | User shall provide arguments following these rules: 81 | 82 | 1. `threads * thread-gap <= random-space` 83 | 2. `payload <= random-space` 84 | 3. `payload <= thread-gap` 85 | 4. `factor * payload <= local-mr` 86 | 87 | Otherwise, our program will try its best to rewrite the arguments to keep the invariants. 88 | 89 | ### Switch to READ mode 90 | 91 | For one-sided tests, we use `--read` flag to specify READ test. So a read client shall use the command: 92 | 93 | ```bash 94 | ./one_sided_rdma --addr ${server_ip}:${listen_port} --read 95 | ``` 96 | 97 | ### Get the average latency 98 | 99 | By default, our tests are targeted at maximizing throughput. 100 | We can switch to minimizing latency by using the following flags at the client: 101 | 102 | ```bash 103 | ./one_sided_rdma --addr ${server_ip}:${listen_port} --threads 1 --factor 1 --latency-test 104 | ``` 105 | 106 | You can then specify the READ test with `--read`, and modify payload with `--payload`. 107 | 108 | ### Change NIC device 109 | 110 | By default, our tests use the first NIC device found. Sometimes, the RNIC you want to test might not be not the first. In these cases, we offer `--nic-idx` to allow user to choose NIC device. 111 | 112 | You can check your NIC device id using: 113 | 114 | ```bash 115 | ibv_devinfo 116 | ``` 117 | 118 | Client can use NIC n by typing: 119 | 120 | ```bash 121 | ./one_sided_rdma --addr ${server_ip}:${listen_port} --nic-idx n 122 | ``` 123 | 124 | Similarly, server can use the following: 125 | 126 | ```bash 127 | ./one_sided_rdma --server --addr ${server_ip}:${listen_port} --nic-idx n 128 | ``` 129 | 130 | ### Accelerate posting w/ doorbell 131 | 132 | In one-sided tests, client can apply doorbell batching to accelerating posting, but according to our experience, we suggest you use it carefully in some cases. You can refer to our paper for details. 133 | 134 | The doorbell batching is activated by typing: 135 | 136 | ```bash 137 | ./one_sided_rdma --addr ${server_ip}:${listen_port} --doorbell 138 | ``` 139 | 140 | If you want to change the batch size (i.e. factor) or the doorbell size (i.e. db_size), remember to make sure that `db_size <= factor`. 141 | 142 | ### Add reporting for clients 143 | 144 | Collecting throughput or latency logs from multiple clients is troublesome. We provide a optional report function, which will collect each client's average throughput and latency and merge them. 145 | 146 | To use this function, first choose a unused port at server, denoted as `report_port`, this port must be different from `listen_port`. Add this argument to server with `--report-addr` flag: 147 | 148 | ```bash 149 | ./one_sided_rdma --server --addr ${server_ip}:${listen_port} --report --report-addr ${server_ip}:${report_port} 150 | ``` 151 | 152 | And for each client, use: 153 | 154 | ```bash 155 | ./one_sided_rdma --addr ${server_ip}:${listen_port} --report --report-addr ${server_ip}:${report_port} 156 | ``` -------------------------------------------------------------------------------- /benchs/docs/two_sided_rdma.md: -------------------------------------------------------------------------------- 1 | # TWO SIDED RDMA 2 | # TWO SIDED RDMA 3 | 4 | ## Quick start 5 | 6 | ### Build 7 | 8 | Read [one_sided_rdma](one_sided_rdma.md) for reference. 9 | Read [one_sided_rdma](one_sided_rdma.md) for reference. 10 | 11 | Our two_sided bench is in `bench/two_sided_rdma` 12 | 13 | Our two_sided bench is in `bench/two_sided_rdma` 14 | 15 | ### Run 16 | 17 | In this section, we will talk about how to run a SEND/RECV test manually. To run our test, one server and multiple clients are required. All of them must be installed with Infiniband RDMA NICs. You can refer to our paper for the hardware environment. 18 | In this section, we will talk about how to run a SEND/RECV test manually. To run our test, one server and multiple clients are required. All of them must be installed with Infiniband RDMA NICs. You can refer to our paper for the hardware environment. 19 | 20 | At the server's terminal, type in: 21 | ```bash 22 | ./two_sided_rdma --server --addr ${server_ip}:${listen_port} 23 | ``` 24 | 25 | User must confirm availability of `server_ip` and `listen_port`, so that our benchmarks can build RDMA connections based on TCP. 26 | 27 | At each client's terminal, type in: 28 | ```bash 29 | ./two_sided_rdma --addr ${server_ip}:${listen_port} 30 | ``` 31 | The `server_ip` and `listen_port` is just the same as server's. 32 | 33 | If clients print logs similar to the following, then you have succeeded: 34 | 35 | ```bash 36 | 08:51:59 [INFO] @0 Throughput: 5.20 Mops/s, Avg Latency: 0.19 µs 37 | 08:52:00 [INFO] @0 Throughput: 5.22 Mops/s, Avg Latency: 0.19 µs 38 | 08:52:01 [INFO] @0 Throughput: 5.21 Mops/s, Avg Latency: 0.19 µs 39 | ... 40 | ``` 41 | 42 | ### Common arguments 43 | 44 | Our benchmark support configuring tests with command line arguments. Here are some common ones: 45 | 46 | |Client side flag|Description|Default| 47 | |---|---|---| 48 | |--payload|Payload(byte) for RDMA requests.|32| 49 | |--client-id|ID of the benchmarking client. Clients will generate different access patterns according to their client ids.|0| 50 | |--client-id|ID of the benchmarking client. Clients will generate different access patterns according to their client ids.|0| 51 | |--threads|Threads number.|1| 52 | |--life|How long will the client live(seconds).|15| 53 | |--huge-page|Whether or not to use huge page for memory region.|N/A| 54 | |--huge-page|Whether or not to use huge page for memory region.|N/A| 55 | 56 | |Server side flag|Description|Default| 57 | |---|---|---| 58 | |--huge-page|Whether or not to use huge page for memory region.|N/A| 59 | |--life|How long will the server live(seconds).|30| 60 | 61 | You can check more flags with `--help`. 62 | 63 | User shall provide arguments following these rules: 64 | 65 | 1. `threads * thread-gap <= random-space` 66 | 2. `payload <= random-space` 67 | 3. `payload <= thread-gap` 68 | 4. `factor * payload <= local-mr` 69 | 70 | Otherwise, our program will try its best to rewrite the arguments to keep the invariants. 71 | 72 | ### Get the average latency 73 | 74 | By default, our tests are targeted at maximizing throughput. 75 | We can switch to minimizing latency by using the following flags at the client: 76 | 77 | ```bash 78 | ./two_sided_rdma --addr ${server_ip}:${listen_port} --threads 1 --factor 1 --latency-test 79 | ``` 80 | 81 | And the server side: 82 | And the server side: 83 | 84 | ```bash 85 | ./two_sided_rdma --server --addr ${server_ip}:${listen_port} --threads 1 --latency-test 86 | ``` 87 | 88 | ### Change NIC device 89 | 90 | By default, our tests use the first NIC device found. Sometimes, the RNIC you want to test might not be not the first. In these cases, we offer `--nic-idx` to allow user to choose NIC device. 91 | 92 | You can check your NIC device id using 93 | 94 | ```bash 95 | ibv_devinfo 96 | ``` 97 | 98 | Client can use NIC n by typing: 99 | 100 | ```bash 101 | ./two_sided_rdma --addr ${server_ip}:${listen_port} --nic-idx n 102 | ``` 103 | 104 | Similarly, server can use the following: 105 | 106 | ```bash 107 | ./two_sided_rdma --server --addr ${server_ip}:${listen_port} --nic-idx n 108 | ``` 109 | 110 | In our `two_sided_client` and `two_sided_server`, qps can be distributed evenly across multiple network ports, you can specify the number of network ports to use with `--nic-num n`. 111 | 112 | For example: 113 | 114 | ```bash 115 | # client-side 116 | ./two_sided_rdma --addr ${server_ip}:${listen_port} --nic-num 2 117 | ``` 118 | 119 | ```bash 120 | # server-side 121 | ./two_sided_rdma --server --addr ${server_ip}:${listen_port} --nic-num 2 122 | ``` 123 | 124 | ### Accelerate posting w/ doorbell 125 | 126 | In two-sided tests, client and server can both apply doorbell batching to accelerating posting, but according to our experience, we suggest you use it carefully in some cases. You can refer to our paper for details. 127 | 128 | The doorbell batching is activated by typing: 129 | 130 | ```bash 131 | ./two_sided_rdma --addr ${server_ip}:${listen_port} --doorbell 132 | ``` 133 | 134 | If you want to change the batch size (i.e. factor) or the doorbell size (i.e. db_size), remember to make sure that `db_size <= factor`. 135 | 136 | ### Add reporting for clients 137 | 138 | Collecting throughput or latency logs from different clients is troublesome. We provide a optional report function, which will collect each client's average throughput and latency and merge them. 139 | 140 | To use this function, first choose a unused port at server, denoted as `report_port`, this port must be different from `listen_port`. Add this argument to server with `--report-addr` flag: 141 | 142 | ```bash 143 | ./two_sided_rdma --server --addr ${server_ip}:${listen_port} --report --report-addr ${server_ip}:${report_port} 144 | ``` 145 | 146 | And for each client, use: 147 | 148 | ```bash 149 | ./two_sided_rdma --addr ${server_ip}:${listen_port} --report --report-addr ${server_ip}:${report_port} 150 | ``` 151 | 152 | P.S. This flag is not recommended when DPU SoC is used as a server. -------------------------------------------------------------------------------- /benchs/two_sided_rdma/src/bootstrap/mod.rs: -------------------------------------------------------------------------------- 1 | mod client_construct; 2 | pub use client_construct::{ 3 | perform_client_routine, 4 | perform_client_doorbell_routine, 5 | perform_client_profile_routine, 6 | }; 7 | 8 | mod server_construct; 9 | pub use server_construct::{ perform_server_routine, perform_server_doorbell_routine }; 10 | 11 | use std::{ thread, time }; 12 | use std::net::{ SocketAddr, TcpStream }; 13 | use std::time::Duration; 14 | use std::collections::HashMap; 15 | use std::sync::{ Arc, RwLock }; 16 | 17 | use tokio::runtime::Runtime; 18 | 19 | use bench_util::args::*; 20 | use bench_util::ud_endpoint::*; 21 | use bench_util::*; 22 | use bench_util::ud_manager::*; 23 | 24 | use netbencher_core::{ 25 | CoordinatedReporterMaster, 26 | BenchRunner, 27 | SimpleBenchReporter, 28 | CoordinatedReporter, 29 | }; 30 | 31 | use log::*; 32 | 33 | pub fn bootstrap_client(args: CmdlineArgs) { 34 | let listen_addr: SocketAddr = args.listen_addr.parse().unwrap(); 35 | let mut socket = TcpStream::connect(listen_addr).unwrap(); 36 | 37 | // create and connect all UD qps for all threads of client 38 | let (client_qps, server_eps) = bootstrap_uds( 39 | &mut socket, 40 | args.nic_idx, 41 | args.nic_num, 42 | args.threads as usize, 43 | args.client_id 44 | ); 45 | 46 | let mut runner = BenchRunner::new(args.threads.try_into().unwrap()); 47 | runner.run(move |thread_id, runner, stat, args| { 48 | match (args.profile, args.doorbell) { 49 | (false, false) => { 50 | perform_client_routine( 51 | thread_id, 52 | runner, 53 | stat, 54 | client_qps[thread_id].clone(), 55 | server_eps[thread_id].clone(), 56 | args 57 | ); 58 | } 59 | (false, true) => { 60 | info!("features: doorbell"); 61 | perform_client_doorbell_routine( 62 | thread_id, 63 | runner, 64 | stat, 65 | client_qps[thread_id].clone(), 66 | server_eps[thread_id].clone(), 67 | args 68 | ); 69 | } 70 | (true, false) => { 71 | info!("features: profile"); 72 | perform_client_profile_routine( 73 | thread_id, 74 | runner, 75 | stat, 76 | client_qps[thread_id].clone(), 77 | server_eps[thread_id].clone(), 78 | args 79 | ); 80 | } 81 | (true, true) => { 82 | warn!("We dont support profiling doorbell send for now!"); 83 | } 84 | } 85 | }, args.clone()); 86 | 87 | let mut inner_reporter = SimpleBenchReporter::new_with_id(args.client_id.try_into().unwrap()); 88 | 89 | if args.report { 90 | Runtime::new() 91 | .unwrap() 92 | .block_on(async { 93 | let mut reporter = CoordinatedReporter::new( 94 | args.report_addr.parse().unwrap(), 95 | inner_reporter 96 | ).await.expect("failed to create the reporter"); 97 | 98 | // send a report to the master 99 | for epoch in 0..args.life { 100 | thread::sleep(time::Duration::from_secs(1)); 101 | runner.report_async(&mut reporter).await; 102 | } 103 | }); 104 | } else { 105 | for epoch in 0..args.life { 106 | thread::sleep(time::Duration::from_secs(1)); 107 | info!("{}", runner.report(&mut inner_reporter)); 108 | } 109 | } 110 | runner.stop().unwrap(); 111 | } 112 | 113 | pub fn bootstrap_server(mut args: CmdlineArgs) { 114 | if args.life < MIN_SERVER_LIFE { 115 | args.life = MIN_SERVER_LIFE; 116 | } 117 | 118 | let conn_meta: Arc>>> = Default::default(); 119 | let listen_addr: SocketAddr = args.listen_addr.parse().unwrap(); 120 | // Create UD qps that are automately assigned to each NIC port 121 | // After bootstraping, server is ready to be connected 122 | let (qps, metas) = bootstrap_ud_server(args.threads as usize, args.nic_idx, args.nic_num); 123 | 124 | // create a copy for closure 125 | let conn_meta_ptr = conn_meta.clone(); 126 | let mut runner = BenchRunner::new(args.threads.try_into().unwrap()); 127 | runner.run(move |thread_id, runner, stat, args| { 128 | match args.doorbell { 129 | false => { 130 | perform_server_routine(runner, qps[thread_id].clone(), conn_meta_ptr.clone(), args); 131 | } 132 | true => { 133 | info!("features: doorbell"); 134 | perform_server_doorbell_routine(runner, qps[thread_id].clone(), conn_meta_ptr.clone(), args); 135 | } 136 | } 137 | }, args.clone()); 138 | 139 | // serialize meta infos of server's UD qps into message 140 | let metas_msg = marshal_batch(metas, 0); 141 | // wait for each client's connect message and the final TERMINATE_SIG 142 | let ud_manager = UdManager::new(listen_addr, conn_meta, metas_msg); 143 | let listen_thread = ud_manager.spawn_server_listener(); 144 | 145 | // start to collecting reports 146 | if args.report { 147 | Runtime::new() 148 | .unwrap() 149 | .block_on(async { 150 | let mut master = CoordinatedReporterMaster::new( 151 | MAX_CLIENTS, 152 | args.report_addr.parse().unwrap() 153 | ).await.expect("failed to create the master"); 154 | 155 | master 156 | .report_event_loop( 157 | Duration::from_secs(args.life.into()), 158 | Duration::from_secs(1) 159 | ).await 160 | .expect("event loop report error"); 161 | }); 162 | } else { 163 | thread::sleep(Duration::from_secs(args.life.into())); 164 | } 165 | 166 | // stop listening and exit 167 | ud_manager.stop_listen(); 168 | listen_thread.join(); 169 | runner.stop().unwrap(); 170 | info!("Server exit."); 171 | } -------------------------------------------------------------------------------- /benchs/bench_util/src/doorbell/recv_doorbell.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | use KRdmaKit::{MemoryRegion, QueuePair, DatapathError}; 3 | use KRdmaKit::rdma_shim::bindings::*; 4 | use core::iter::TrustedRandomAccessNoCoerce; 5 | use core::ops::Range; 6 | 7 | pub(self) const DEFAULT_BATCH_SZ: usize = 64; 8 | 9 | /* A structure to help recv doorbell. */ 10 | pub struct RecvDoorbell { 11 | /// contains recv wrs and sges to save requests 12 | pub wrs: [ibv_recv_wr; DEFAULT_BATCH_SZ], 13 | pub sges: [ibv_sge; DEFAULT_BATCH_SZ], 14 | 15 | pub cur_idx: isize, 16 | pub capacity: usize, 17 | } 18 | 19 | impl RecvDoorbell { 20 | pub fn create(capacity: usize) -> Self { 21 | let ret = Self { 22 | capacity, 23 | cur_idx: -1, 24 | wrs: [Default::default(); DEFAULT_BATCH_SZ], 25 | sges: [ibv_sge { 26 | addr: 0, 27 | length: 0, 28 | lkey: 0, 29 | }; DEFAULT_BATCH_SZ], 30 | }; 31 | ret 32 | } 33 | 34 | #[inline] 35 | pub fn init(&mut self) { 36 | for i in 0..self.capacity { 37 | self.wrs[i].num_sge = 1; 38 | self.wrs[i].next = &mut self.wrs[(i + 1) % self.capacity] as *mut ibv_recv_wr; 39 | self.wrs[i].sg_list = &mut self.sges[i] as *mut ibv_sge; 40 | } 41 | } 42 | 43 | /// Return current batching size 44 | #[inline] 45 | pub fn size(&self) -> isize { 46 | self.cur_idx + 1 47 | } 48 | #[inline] 49 | pub fn is_empty(&self) -> bool { 50 | self.size() <= 0 51 | } 52 | #[inline] 53 | pub fn empty(&mut self) { 54 | self.cur_idx = -1; 55 | } 56 | #[inline] 57 | pub fn is_full(&self) -> bool { 58 | self.size() >= self.capacity as isize 59 | } 60 | 61 | /// Get the next doorbell entry 62 | /// # Return value 63 | /// - `true` means the doorbell batching size is less than `capacity`, it is ok to add a new doorbell 64 | /// - `false` means doorbell is full, cannot add new entry 65 | /// 66 | /// User shall check its return value 67 | #[inline] 68 | pub fn next(&mut self) -> bool { 69 | if self.is_full() { 70 | return false; 71 | } 72 | self.cur_idx += 1; 73 | true 74 | } 75 | } 76 | 77 | impl RecvDoorbell { 78 | /// Before flushing the doorbell, we must freeze it to prevent adding 79 | #[inline] 80 | pub fn freeze(&mut self) { 81 | assert!(!self.is_empty()); // should not be empty 82 | self.cur_wr().next = core::ptr::null_mut(); 83 | } 84 | 85 | /// After flushing the doorbell, unfreeze it 86 | #[inline] 87 | pub fn freeze_done(&mut self) { 88 | assert!(!self.is_empty()); 89 | if self.cur_idx == (self.capacity - 1) as isize { 90 | self.wrs[self.cur_idx as usize].next = &mut self.wrs[0] as *mut ib_recv_wr; 91 | } else { 92 | self.wrs[self.cur_idx as usize].next = 93 | &mut self.wrs[(self.cur_idx + 1) as usize] as *mut ib_recv_wr; 94 | } 95 | } 96 | 97 | #[inline] 98 | pub fn clear(&mut self) { 99 | self.freeze_done(); 100 | self.cur_idx = -1; 101 | } 102 | /// Return the ptr to current doorbell entry's wr 103 | #[inline] 104 | pub fn cur_wr(&mut self) -> &mut ibv_recv_wr { 105 | return if self.is_empty() { 106 | &mut self.wrs[0] 107 | } else { 108 | &mut self.wrs[self.cur_idx as usize] 109 | }; 110 | } 111 | /// Return the ptr to current doorbell entry's sge 112 | #[inline] 113 | pub fn cur_sge(&mut self) -> &mut ibv_sge { 114 | return if self.is_empty() { 115 | &mut self.sges[0] 116 | } else { 117 | &mut self.sges[self.cur_idx as usize] 118 | }; 119 | } 120 | 121 | #[inline] 122 | pub fn first_wr_ptr(&mut self) -> *mut ib_recv_wr { 123 | &mut self.wrs[0] as *mut ibv_recv_wr 124 | } 125 | /// Return the ptr to specified doorbell entry's wr 126 | ///**WRRN**: No check for idx. The caller has to take care of it by himself 127 | #[inline] 128 | pub fn get_wr_ptr(&mut self, idx: usize) -> *mut ibv_recv_wr { 129 | &mut self.wrs[idx] as *mut ibv_recv_wr 130 | } 131 | /// Return the ptr to specified doorbell entry's sge 132 | #[inline] 133 | pub fn get_sge_ptr(&mut self, idx: usize) -> *mut ibv_sge { 134 | &mut self.sges[idx] as *mut ibv_sge 135 | } 136 | } 137 | 138 | /* Maintain recv requests with a doorbell 139 | Capacity of the doorbell is designated by the `capacity` arg in RecvDoorbellHelper::create 140 | */ 141 | pub struct RecvDoorbellHelper { 142 | recv_doorbell: RecvDoorbell, 143 | recv_qp: Arc, 144 | } 145 | 146 | impl RecvDoorbellHelper { 147 | pub fn create(capacity: usize, qp: Arc ) -> Self { 148 | 149 | let mut ret = Self { 150 | recv_doorbell: RecvDoorbell::create(capacity), 151 | recv_qp: qp, 152 | }; 153 | ret.recv_doorbell.init(); 154 | ret 155 | } 156 | 157 | #[inline] 158 | pub fn sanity_check(&self) -> bool { 159 | let mut ret = true; 160 | for i in 0..self.recv_doorbell.capacity { 161 | let sge_ptr = & self.recv_doorbell.sges[i] as *const ibv_sge; 162 | let wr_sg_list = self.recv_doorbell.wrs[i].sg_list; 163 | ret = sge_ptr as u64 == wr_sg_list as u64; 164 | } 165 | ret 166 | } 167 | 168 | pub fn post_recv( 169 | &mut self, 170 | mr: &MemoryRegion, 171 | range: Range, 172 | wr_id: u64, 173 | ) -> Result<(), DatapathError> { 174 | self.recv_doorbell.next(); 175 | // setup sge fields 176 | self.recv_doorbell.cur_sge().addr = unsafe { mr.get_rdma_addr() + range.start }; 177 | self.recv_doorbell.cur_sge().length = range.size() as u32; 178 | self.recv_doorbell.cur_sge().lkey = mr.lkey().0; 179 | // setup recv wr fields 180 | self.recv_doorbell.cur_wr().wr_id = wr_id; 181 | 182 | let mut res = Ok(()); 183 | if self.recv_doorbell.is_full() { 184 | // println!("flush a recv doorbell"); 185 | self.recv_doorbell.freeze(); 186 | res = self.flush(); 187 | self.recv_doorbell.clear(); 188 | } 189 | res 190 | } 191 | 192 | #[inline] 193 | pub fn flush(&mut self) -> Result<(), DatapathError> { 194 | self.recv_qp.post_recv_wr(self.recv_doorbell.first_wr_ptr()) 195 | } 196 | } -------------------------------------------------------------------------------- /benchs/rdma_util/src/recv_doorbell.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | use KRdmaKit::{MemoryRegion, QueuePair, DatapathError}; 3 | use KRdmaKit::rdma_shim::bindings::*; 4 | use core::iter::TrustedRandomAccessNoCoerce; 5 | use core::ops::Range; 6 | 7 | pub(self) const DEFAULT_BATCH_SZ: usize = 64; 8 | 9 | pub const MAX_RECV_NUM: usize = 64; 10 | 11 | /* A structure to help recv doorbell. */ 12 | pub struct RecvDoorbell { 13 | /// contains recv wrs and sges to save requests 14 | pub wrs: [ibv_recv_wr; DEFAULT_BATCH_SZ], 15 | pub sges: [ibv_sge; DEFAULT_BATCH_SZ], 16 | 17 | pub cur_idx: isize, 18 | pub capacity: usize, 19 | } 20 | 21 | impl RecvDoorbell { 22 | pub fn create(capacity: usize) -> Self { 23 | let ret = Self { 24 | capacity, 25 | cur_idx: -1, 26 | wrs: [Default::default(); DEFAULT_BATCH_SZ], 27 | sges: [ibv_sge { 28 | addr: 0, 29 | length: 0, 30 | lkey: 0, 31 | }; DEFAULT_BATCH_SZ], 32 | }; 33 | ret 34 | } 35 | 36 | #[inline] 37 | pub fn init(&mut self) { 38 | for i in 0..self.capacity { 39 | self.wrs[i].num_sge = 1; 40 | self.wrs[i].next = &mut self.wrs[(i + 1) % self.capacity] as *mut ibv_recv_wr; 41 | self.wrs[i].sg_list = &mut self.sges[i] as *mut ibv_sge; 42 | } 43 | } 44 | 45 | /// Return current batching size 46 | #[inline] 47 | pub fn size(&self) -> isize { 48 | self.cur_idx + 1 49 | } 50 | #[inline] 51 | pub fn is_empty(&self) -> bool { 52 | self.size() <= 0 53 | } 54 | #[inline] 55 | pub fn empty(&mut self) { 56 | self.cur_idx = -1; 57 | } 58 | #[inline] 59 | pub fn is_full(&self) -> bool { 60 | self.size() >= self.capacity as isize 61 | } 62 | 63 | /// Get the next doorbell entry 64 | /// # Return value 65 | /// - `true` means the doorbell batching size is less than `capacity`, it is ok to add a new doorbell 66 | /// - `false` means doorbell is full, cannot add new entry 67 | /// 68 | /// User shall check its return value 69 | #[inline] 70 | pub fn next(&mut self) -> bool { 71 | if self.is_full() { 72 | return false; 73 | } 74 | self.cur_idx += 1; 75 | true 76 | } 77 | } 78 | 79 | impl RecvDoorbell { 80 | /// Before flushing the doorbell, we must freeze it to prevent adding 81 | #[inline] 82 | pub fn freeze(&mut self) { 83 | assert!(!self.is_empty()); // should not be empty 84 | self.cur_wr().next = core::ptr::null_mut(); 85 | } 86 | 87 | /// After flushing the doorbell, unfreeze it 88 | #[inline] 89 | pub fn freeze_done(&mut self) { 90 | assert!(!self.is_empty()); 91 | if self.cur_idx == (self.capacity - 1) as isize { 92 | self.wrs[self.cur_idx as usize].next = &mut self.wrs[0] as *mut ib_recv_wr; 93 | } else { 94 | self.wrs[self.cur_idx as usize].next = 95 | &mut self.wrs[(self.cur_idx + 1) as usize] as *mut ib_recv_wr; 96 | } 97 | } 98 | 99 | #[inline] 100 | pub fn clear(&mut self) { 101 | self.freeze_done(); 102 | self.cur_idx = -1; 103 | } 104 | /// Return the ptr to current doorbell entry's wr 105 | #[inline] 106 | pub fn cur_wr(&mut self) -> &mut ibv_recv_wr { 107 | return if self.is_empty() { 108 | &mut self.wrs[0] 109 | } else { 110 | &mut self.wrs[self.cur_idx as usize] 111 | }; 112 | } 113 | /// Return the ptr to current doorbell entry's sge 114 | #[inline] 115 | pub fn cur_sge(&mut self) -> &mut ibv_sge { 116 | return if self.is_empty() { 117 | &mut self.sges[0] 118 | } else { 119 | &mut self.sges[self.cur_idx as usize] 120 | }; 121 | } 122 | 123 | #[inline] 124 | pub fn first_wr_ptr(&mut self) -> *mut ib_recv_wr { 125 | &mut self.wrs[0] as *mut ibv_recv_wr 126 | } 127 | /// Return the ptr to specified doorbell entry's wr 128 | ///**WRRN**: No check for idx. The caller has to take care of it by himself 129 | #[inline] 130 | pub fn get_wr_ptr(&mut self, idx: usize) -> *mut ibv_recv_wr { 131 | &mut self.wrs[idx] as *mut ibv_recv_wr 132 | } 133 | /// Return the ptr to specified doorbell entry's sge 134 | #[inline] 135 | pub fn get_sge_ptr(&mut self, idx: usize) -> *mut ibv_sge { 136 | &mut self.sges[idx] as *mut ibv_sge 137 | } 138 | } 139 | 140 | /* Maintain recv requests with a doorbell 141 | Capacity of the doorbell is designated by the `capacity` arg in RecvDoorbellHelper::create 142 | */ 143 | pub struct RecvDoorbellHelper { 144 | recv_doorbell: RecvDoorbell, 145 | recv_qp: Arc, 146 | } 147 | 148 | impl RecvDoorbellHelper { 149 | pub fn create(capacity: usize, qp: Arc ) -> Self { 150 | 151 | let mut ret = Self { 152 | recv_doorbell: RecvDoorbell::create(capacity), 153 | recv_qp: qp, 154 | }; 155 | ret.recv_doorbell.init(); 156 | ret 157 | } 158 | 159 | #[inline] 160 | pub fn sanity_check(&self) -> bool { 161 | let mut ret = true; 162 | for i in 0..self.recv_doorbell.capacity { 163 | let sge_ptr = & self.recv_doorbell.sges[i] as *const ibv_sge; 164 | let wr_sg_list = self.recv_doorbell.wrs[i].sg_list; 165 | ret = sge_ptr as u64 == wr_sg_list as u64; 166 | } 167 | ret 168 | } 169 | 170 | pub fn post_recv( 171 | &mut self, 172 | mr: &MemoryRegion, 173 | range: Range, 174 | wr_id: u64, 175 | ) -> Result<(), DatapathError> { 176 | self.recv_doorbell.next(); 177 | // setup sge fields 178 | self.recv_doorbell.cur_sge().addr = unsafe { mr.get_rdma_addr() + range.start }; 179 | self.recv_doorbell.cur_sge().length = range.size() as u32; 180 | self.recv_doorbell.cur_sge().lkey = mr.lkey().0; 181 | // setup recv wr fields 182 | self.recv_doorbell.cur_wr().wr_id = wr_id; 183 | 184 | let mut res = Ok(()); 185 | if self.recv_doorbell.is_full() { 186 | // println!("flush a recv doorbell"); 187 | self.recv_doorbell.freeze(); 188 | res = self.flush(); 189 | self.recv_doorbell.clear(); 190 | } 191 | res 192 | } 193 | 194 | #[inline] 195 | pub fn flush(&mut self) -> Result<(), DatapathError> { 196 | self.recv_qp.post_recv_wr(self.recv_doorbell.first_wr_ptr()) 197 | } 198 | } -------------------------------------------------------------------------------- /netbencher_core/src/reporter/mod.rs: -------------------------------------------------------------------------------- 1 | //! This module contains the reporter that is used to report the benchmark result. 2 | //! Generally, we provide two reporters. 3 | //! 4 | //! [`SimpleBenchReporter`] is a simple reporter that collects the results of workers on a list. 5 | //! [`DistributedBenchReporter`] will send the results to a remote reporter for aggregration. 6 | //! 7 | //! Example usage of [`SimpleBenchReporter`]: 8 | //! 9 | //! ``` 10 | //! #![feature(get_mut_unchecked)] 11 | 12 | //! extern crate netbencher_core; 13 | //! 14 | //! use std::sync::Arc; 15 | //! 16 | //! use netbencher_core::{BenchRunner, SimpleBenchReporter}; 17 | //! 18 | //! fn main() { 19 | //! let mut runner = BenchRunner::new(1); 20 | //! runner.run( 21 | //! // The evaluated function will increase the statics per second 22 | //! |worker_id, runner, mut stats, _| { 23 | //! println!("Worker {} started", worker_id); 24 | //! while runner.running() { 25 | //! std::thread::sleep(std::time::Duration::from_secs(1)); 26 | //! unsafe { Arc::get_mut_unchecked(&mut stats).finished_one_op() }; 27 | //! } 28 | //! }, 29 | //! (), 30 | //! ); 31 | //! 32 | //! let mut reporter = SimpleBenchReporter::new(); 33 | //! for _ in 0..1 { 34 | //! std::thread::sleep(std::time::Duration::from_secs(1)); 35 | //! let stat = runner.report(&mut reporter); 36 | //! println!("Results: {}", stat); 37 | //! 38 | //! } 39 | //! 40 | //! runner.stop().unwrap(); 41 | //! } 42 | //! 43 | //! ``` 44 | //! 45 | use std::ops; 46 | use std::sync::Arc; 47 | 48 | use serde_derive::{Deserialize, Serialize}; 49 | 50 | mod simple_reporter; 51 | pub use simple_reporter::SimpleBenchReporter; 52 | 53 | mod coordinated_reporter; 54 | pub use coordinated_reporter::{CoordinatedReporter, CoordinatedReporterMaster}; 55 | 56 | 57 | 58 | #[derive(Clone, Copy, Debug, PartialEq)] 59 | struct AvgRdtsc { 60 | pub value: u64, 61 | pub cnt_num: i64, 62 | } 63 | 64 | /// BenchStat is a single stat that is reported by a worker 65 | /// It records the following things: 66 | /// > 1. num ops finished during this period 67 | /// > 2. latency of each op 68 | /// etc. 69 | #[derive(Clone, Copy, Debug, PartialEq)] 70 | #[repr(align(128))] 71 | pub struct BenchStat { 72 | /// The number of ops finished during this period 73 | pub num_ops_finished: u64, 74 | 75 | /// The average rdtsc value of ops reported 76 | avg_rdtsc: AvgRdtsc, 77 | } 78 | 79 | impl BenchStat { 80 | /// Reset the stat 81 | pub fn reset(&mut self) { 82 | self.num_ops_finished = 0; 83 | self.avg_rdtsc = AvgRdtsc {value: 0, cnt_num: 0}; 84 | } 85 | 86 | /// Mark the stat that one op is finished 87 | pub fn finished_one_op(&mut self) { 88 | self.finished_batch_ops(1); 89 | } 90 | 91 | /// Mark the stat that a batch of ops are finished 92 | pub fn finished_batch_ops(&mut self, num_ops: u64) { 93 | self.num_ops_finished += num_ops; 94 | } 95 | 96 | /// Record the average rdtsc for each op 97 | pub fn record_avg_rdtsc(&mut self, num: i64) { 98 | self.avg_rdtsc.cnt_num += 1; 99 | self.avg_rdtsc.value += 100 | ((num - self.avg_rdtsc.value as i64) /self.avg_rdtsc.cnt_num) as u64; 101 | } 102 | } 103 | 104 | impl Default for BenchStat { 105 | fn default() -> Self { 106 | Self { 107 | num_ops_finished: 0, 108 | avg_rdtsc: AvgRdtsc {value: 0, cnt_num: 0}, 109 | } 110 | } 111 | } 112 | 113 | /// A collection of BenchStat to transform it to a user-readable format 114 | /// Basically, we care about the following stuffs: 115 | /// 1. throughput 116 | /// 2. average latency 117 | /// 3. 99th latency (TBD) 118 | #[derive(Debug, PartialEq, Clone, Copy, Serialize, Deserialize)] 119 | pub struct CollectedBenchStat { 120 | /// The number of ops finished during a period 121 | pub throughput: f64, 122 | /// The average latency of all ops 123 | pub avg_latency: f64, 124 | /// The 99th latency of all ops 125 | pub p99_latency: f64, 126 | 127 | /// The id of the stats 128 | pub id: usize, 129 | } 130 | 131 | impl Default for CollectedBenchStat { 132 | fn default() -> Self { 133 | Self { 134 | throughput: 0.0, 135 | avg_latency: 0.0, 136 | p99_latency: 0.0, 137 | id: 0, 138 | } 139 | } 140 | } 141 | 142 | impl CollectedBenchStat { 143 | /// Reset the stat 144 | pub fn reset(&mut self) { 145 | self.throughput = 0.0; 146 | self.avg_latency = 0.0; 147 | self.p99_latency = 0.0; 148 | } 149 | } 150 | 151 | /// BenchReporter is a trait that defines how to report stats collected. 152 | pub trait BenchReporter { 153 | /// Collect the results from the list of BenchStats and collect it to a CollectedBenchStat, 154 | /// which is a user-readable format. 155 | fn report_collected_stat(&mut self, stats: &Vec>) -> CollectedBenchStat; 156 | } 157 | 158 | /// AsyncBenchReporter is a trait that defines how to report stats collected. 159 | /// The only difference with [`BenchmarkReporter`] is that it is async. 160 | pub trait AsyncBenchReporter { 161 | /// Collect the results from the list of BenchStats and collect it to a CollectedBenchStat, 162 | /// which is a user-readable format (async version). 163 | async fn async_report_collect_stat(&mut self, stats: &Vec>) -> CollectedBenchStat; 164 | } 165 | 166 | impl ops::Add for CollectedBenchStat { 167 | type Output = Self; 168 | 169 | fn add(self, other: Self) -> Self { 170 | // FIXME: the latency calculation is not so properly here 171 | Self { 172 | throughput: self.throughput + other.throughput, 173 | avg_latency: (self.avg_latency + other.avg_latency) / 2.0, 174 | p99_latency: (self.p99_latency + other.p99_latency) / 2.0, 175 | id: self.id, 176 | } 177 | } 178 | } 179 | 180 | impl ops::Add for BenchStat { 181 | type Output = Self; 182 | 183 | fn add(self, other: Self) -> Self { 184 | Self { 185 | num_ops_finished: self.num_ops_finished + other.num_ops_finished, 186 | avg_rdtsc: AvgRdtsc {value: 0, cnt_num: 0}, 187 | } 188 | } 189 | } 190 | 191 | impl ops::Sub for BenchStat { 192 | type Output = Self; 193 | 194 | fn sub(self, other: Self) -> Self { 195 | Self { 196 | num_ops_finished: self.num_ops_finished - other.num_ops_finished, 197 | avg_rdtsc: AvgRdtsc {value: 0, cnt_num: 0}, 198 | } 199 | } 200 | } 201 | 202 | impl std::fmt::Display for CollectedBenchStat { 203 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 204 | write!( 205 | f, 206 | "@{} Throughput: {:.4} Mops/s, Avg Latency: {:.2} µs", 207 | self.id, self.throughput, self.avg_latency 208 | ) 209 | } 210 | } 211 | -------------------------------------------------------------------------------- /benchs/bench_util/src/args.rs: -------------------------------------------------------------------------------- 1 | use std::sync::{ Arc }; 2 | use std::net::SocketAddr; 3 | 4 | use clap::{ Command, arg, Parser }; 5 | 6 | use KRdmaKit::{ MemoryRegion, QueuePair, QueuePairBuilder, QueuePairStatus, UDriver, DatapathError }; 7 | use KRdmaKit::services_user::MRInfo; 8 | 9 | use rand::RngCore; 10 | use rand_chacha::ChaCha8Rng; 11 | 12 | use crate::CACHE_LINE_SZ; 13 | use crate::round_up; 14 | 15 | #[derive(Debug, Parser)] 16 | pub struct CmdlineArgs { 17 | /* Common fields of client and server */ 18 | 19 | /// Number of NIC devices used 20 | #[arg(long, default_value_t = 1)] 21 | pub nic_num: usize, 22 | 23 | /// Index of the first NIC device used 24 | #[arg(long, default_value_t = 0)] 25 | pub nic_idx: usize, 26 | 27 | /// Memory region bytes of the server 28 | #[arg(long, default_value_t = 10240)] 29 | pub random_space: u64, 30 | 31 | /// The life of the bench (seconds) 32 | #[arg(long, default_value_t = 15)] 33 | pub life: u32, 34 | 35 | /// The listening address of server 36 | #[arg(long, default_value_t = String::from("127.0.0.1:8888"))] 37 | pub listen_addr: String, 38 | 39 | /// The reporting address of server 40 | #[arg(long, default_value_t = String::from("127.0.0.1:10001"))] 41 | pub report_addr: String, 42 | 43 | /* Client-specific fields */ 44 | /// Number of threads used 45 | #[arg(short, long, default_value_t = 1)] 46 | pub threads: u64, 47 | 48 | /// Number of requests in a batch 49 | #[arg(short, long, default_value_t = 64)] 50 | pub factor: u64, 51 | 52 | /// Payload of each request 53 | #[arg(short, long, default_value_t = 64)] 54 | pub payload: u64, 55 | 56 | /// Client-local memory region bytes 57 | #[arg(long, default_value_t = 4096)] 58 | pub local_mr: u64, 59 | 60 | /// Whether to run READ bench 61 | #[arg(long)] 62 | pub read: bool, 63 | 64 | /// Whether to separate thread access area 65 | #[arg(long)] 66 | pub fixed: bool, 67 | 68 | /// Client id, which will be used to generate unique seed 69 | #[arg(long, default_value_t = 0)] 70 | pub client_id: u64, 71 | 72 | /// The random access area bytes 73 | #[arg(long, default_value_t = 1024)] 74 | pub thread_gap: u64, 75 | 76 | /// Whether to run lantency test 77 | #[arg(long)] 78 | pub latency_test: bool, 79 | 80 | /// One signal in requests 81 | #[arg(long, default_value_t = 16)] 82 | pub signal_size: usize, 83 | 84 | /// One doorbell for requests 85 | #[arg(long, default_value_t = 16)] 86 | pub db_size: usize, 87 | 88 | /// Whether to report to 89 | #[arg(long)] 90 | pub report: bool, 91 | 92 | /// Whether to generate a signal at client for every request 93 | #[arg(long)] 94 | pub signaled: bool, 95 | 96 | /// Whether to apply doorbell batching 97 | #[arg(long)] 98 | pub doorbell: bool, 99 | 100 | /// Whether to profile the posting cost 101 | #[arg(long)] 102 | pub profile: bool, 103 | /* Server-specific fields */ 104 | 105 | /// Whether to run the bench in server mode 106 | #[arg(long)] 107 | pub server: bool, 108 | 109 | /// Whether to allcate memory regions using huge pages 110 | #[arg(long)] 111 | pub huge_page: bool, 112 | } 113 | 114 | impl Clone for CmdlineArgs { 115 | fn clone(&self) -> Self { 116 | Self { 117 | listen_addr: self.listen_addr.clone(), 118 | report_addr: self.report_addr.clone(), 119 | ..*self 120 | } 121 | } 122 | } 123 | 124 | impl CmdlineArgs { 125 | /// coordinate the arguments to make them be compatible to each other 126 | pub fn coordinate(&mut self) { 127 | self.local_mr = std::cmp::max(self.factor * self.payload, self.local_mr); 128 | self.thread_gap = std::cmp::max(self.payload, self.thread_gap); 129 | self.random_space = std::cmp::max(self.payload, self.random_space); 130 | self.random_space = std::cmp::max(self.threads * self.thread_gap, self.random_space); 131 | } 132 | 133 | pub fn create_rc(&self, thread_id: usize) -> Result<(Arc, Arc, MRInfo), ()> { 134 | let addr: SocketAddr = self.listen_addr.parse().unwrap(); 135 | let client_port: u8 = 1; 136 | let ctx = UDriver::create() 137 | .expect("failed to query device") 138 | .devices() 139 | .get((thread_id % self.nic_num) + self.nic_idx) 140 | .expect("no rdma device available") 141 | .open_context() 142 | .expect("failed to create RDMA context"); 143 | let mut builder = QueuePairBuilder::new(&ctx); 144 | builder.allow_remote_rw().allow_remote_atomic().set_port_num(client_port); 145 | let qp = builder.build_rc().expect("failed to create the client QP"); 146 | let qp = qp.handshake(addr).expect("Handshake failed!"); 147 | let a = qp.status().expect("Query status failed!"); 148 | match a { 149 | QueuePairStatus::ReadyToSend => {} 150 | _ => { 151 | return Err(()); 152 | } 153 | } 154 | let mr_infos = qp.query_mr_info().expect("Failed to query MR info"); 155 | let mr_metadata = mr_infos.inner().get("MR").expect("Unregistered MR"); 156 | let client_mr = match self.huge_page { 157 | true => { 158 | Arc::new(MemoryRegion::new_huge_page(ctx.clone(), self.local_mr as _).expect( 159 | "Failed to allocate hugepage MR for send buffer" 160 | )) 161 | }, 162 | false => { 163 | Arc::new(MemoryRegion::new(ctx.clone(), self.local_mr as _).expect( 164 | "Failed to allocate MR" 165 | )) 166 | } 167 | }; 168 | 169 | 170 | let mr_buf = client_mr.get_virt_addr() as *mut u64; 171 | unsafe { 172 | *mr_buf = 0; 173 | } 174 | Ok((qp, client_mr, MRInfo {addr: mr_metadata.addr, capacity: mr_metadata.capacity, rkey: mr_metadata.rkey})) 175 | } 176 | 177 | pub fn get_next_index(&self, thread_idx: usize, rand: &mut ChaCha8Rng) -> u64 { 178 | let mut r = rand.next_u64(); 179 | 180 | if self.payload == self.random_space { 181 | return 0; 182 | } 183 | 184 | if self.fixed { 185 | if self.thread_gap != 0 { 186 | // r = (thread_idx * 64) as _; 187 | assert!(self.thread_gap >= self.payload); 188 | assert!(self.threads * self.thread_gap <= self.random_space); 189 | r = (r % self.thread_gap) + (thread_idx as u64) * self.thread_gap; 190 | } else { 191 | r = 0; 192 | } 193 | } 194 | 195 | // align 196 | r = round_up(r, CACHE_LINE_SZ as i64); 197 | assert_eq!(r % CACHE_LINE_SZ, 0); 198 | 199 | let index = (r % (self.random_space - self.payload)) as u64; 200 | index 201 | } 202 | } -------------------------------------------------------------------------------- /netbencher_core/src/reporter/coordinated_reporter.rs: -------------------------------------------------------------------------------- 1 | //! A reporter that reports the throughput and latency of workers from this machine. 2 | //! This contains two parts: a master and several reporters. 3 | //! The master collects the reports from the reporters and aggregates them accordingly, see `report_event_loop`. 4 | //! 5 | //! To use this module, first start the master at one node, then start the reporters at other nodes. 6 | //! More specifically, see the following master example: 7 | //! 8 | //! ```ignore 9 | //! let mut master = CoordinatedReporterMaster::new( 10 | //! "127.0.0.1:8888".parse().unwrap(), 11 | //! ) 12 | //! .await 13 | //! .expect("failed to create the master"); 14 | //! 15 | //! master 16 | //! .report_event_loop( 17 | //! Duration::from_secs(10), // run 10 seconds 18 | //! Duration::from_secs(1), // report every 1 seconds 19 | //! ) 20 | //! .await 21 | //! .expect("Event loop report error"); 22 | //! ``` 23 | //! 24 | //! Then, start any reporters at other nodes, for example: 25 | //! 26 | //! ```ignore 27 | //! let bench = BenchRunner::new(1); // a bench runner w/ 1 threads 28 | //! 29 | //! let inner_reporter = SimpleBenchReporter::new_with_id(0); // can be any reporter 30 | //! let mut reporter = CoordinatedReporter::new( 31 | //! "127.0.0.1:8888", inner_reporter).await.expect("failed to create the reporter"); 32 | //! 33 | //! // send a report to the master 34 | //! bench.async_report(&mut reporter).await; 35 | //! 36 | //! ``` 37 | //! 38 | //! More example can be found at `netbencher-core/examples/coordinator_report_worker.rs` and `netbencher-core/examples/coordinator_report_master.rs`. 39 | //! 40 | use std::net::SocketAddr; 41 | use std::sync::Arc; 42 | use std::time::Duration; 43 | use std::time::Instant; 44 | 45 | use tokio::net::UdpSocket; 46 | 47 | use crate::AsyncBenchReporter; 48 | 49 | use super::{BenchReporter, BenchStat, CollectedBenchStat}; 50 | 51 | use log::{info}; 52 | 53 | /// A coordinator that collects reports from [`CoordinatedReporter`]s. 54 | /// 55 | /// # Note 56 | /// We assume that `CoordinatedReporter`s IDs are continous and start from 0. 57 | /// 58 | pub struct CoordinatedReporterMaster { 59 | num_reports: Vec, 60 | record_time: Vec, 61 | master_socket: UdpSocket, 62 | } 63 | 64 | impl CoordinatedReporterMaster { 65 | /// Create a new coordinated reporter master (async version) 66 | pub async fn new(num_reporters: usize, sock: SocketAddr) -> std::io::Result { 67 | let master_socket = UdpSocket::bind(sock).await?; 68 | let mut num_reports = Vec::with_capacity(num_reporters); 69 | let mut record_time = Vec::with_capacity(num_reporters); 70 | 71 | let now = Instant::now(); 72 | for _ in 0..num_reporters { 73 | num_reports.push(Default::default()); 74 | record_time.push(now); 75 | } 76 | 77 | Ok(Self { 78 | num_reports: num_reports, 79 | record_time, 80 | master_socket, 81 | }) 82 | } 83 | 84 | /// Run an event loop to collect the reports from the reporters 85 | pub async fn report_event_loop( 86 | &mut self, 87 | duration: Duration, 88 | report_duration: Duration, 89 | ) -> Result<(), Box> { 90 | let start_time = Instant::now(); 91 | let mut tick_time = Instant::now(); 92 | let mut buf = [0u8; 1024]; 93 | 94 | let mut cur_time = Instant::now(); 95 | 96 | while cur_time.duration_since(start_time) <= duration { 97 | // recv message 98 | match self.master_socket.try_recv_from(&mut buf) { 99 | Ok((n, _addr)) => { 100 | let stat: CollectedBenchStat = serde_json::from_slice(&buf[..n])?; 101 | self.num_reports[stat.id] = stat; 102 | self.record_time[stat.id] = cur_time; 103 | } 104 | Err(ref e) if e.kind() == std::io::ErrorKind::WouldBlock => { 105 | // continue; 106 | } 107 | Err(e) => { 108 | return Err(Box::new(e)); 109 | } 110 | } 111 | 112 | if cur_time.duration_since(tick_time) >= report_duration { 113 | let res = self.aggregrate_stats(&cur_time); 114 | info!("reports: {}", res); 115 | tick_time = Instant::now(); 116 | } 117 | cur_time = Instant::now(); 118 | } 119 | Ok(()) 120 | } 121 | 122 | fn aggregrate_stats(&self, cur_time: &Instant) -> CollectedBenchStat { 123 | let mut res = CollectedBenchStat::default(); 124 | for i in 0..self.num_reports.len() { 125 | // we will filter out outdated reports 126 | if cur_time.duration_since(self.record_time[i]) <= Duration::from_millis(1500) { 127 | res = res + self.num_reports[i]; 128 | } 129 | } 130 | res 131 | } 132 | } 133 | 134 | /// A reporter that reports the throughput and latency of workers from this machine. 135 | pub struct CoordinatedReporter { 136 | // We leverage the inner reporter to collect stats 137 | inner: R, 138 | master_addr: SocketAddr, 139 | master_socket: UdpSocket, 140 | } 141 | 142 | impl CoordinatedReporter 143 | where 144 | R: BenchReporter, 145 | { 146 | /// Create a new coordinated reporter (async version) using a known reporter 147 | pub async fn new(master_addr: SocketAddr, reporter: R) -> std::io::Result { 148 | let master_socket = UdpSocket::bind("0.0.0.0:0").await?; 149 | Ok(Self { 150 | inner: reporter, 151 | master_addr, 152 | master_socket, 153 | }) 154 | } 155 | } 156 | 157 | impl BenchReporter for CoordinatedReporter 158 | where 159 | R: BenchReporter, 160 | { 161 | // 1. collect the stats 162 | // 2. send the stats to the master 163 | fn report_collected_stat(&mut self, stats: &Vec>) -> CollectedBenchStat { 164 | self.inner.report_collected_stat(stats) 165 | } 166 | } 167 | 168 | impl AsyncBenchReporter for CoordinatedReporter 169 | where 170 | R: BenchReporter, 171 | { 172 | async fn async_report_collect_stat( 173 | &mut self, 174 | stats: &Vec>, 175 | ) -> CollectedBenchStat { 176 | let res = self.inner.report_collected_stat(stats); 177 | self.master_socket 178 | .send_to( 179 | serde_json::to_vec(&res).unwrap().as_slice(), 180 | self.master_addr, 181 | ) 182 | .await 183 | .expect("send UDP message to master failed"); 184 | res 185 | } 186 | } 187 | 188 | impl std::fmt::Display for CoordinatedReporter 189 | where 190 | R: BenchReporter, 191 | { 192 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 193 | write!(f, "CoordinatedReporter: report to {}", self.master_addr) 194 | } 195 | } 196 | 197 | mod tests { 198 | 199 | #[test] 200 | fn test_coordinated_reporter_create() { 201 | use super::*; 202 | use crate::SimpleBenchReporter; 203 | use tokio::runtime::Runtime; 204 | 205 | let rt = Runtime::new().unwrap(); 206 | rt.block_on(async { 207 | let r = SimpleBenchReporter::new_with_id(0); 208 | // the addr is not important here, can be any addr 209 | let _ = CoordinatedReporter::new("127.0.0.1:8080".parse().unwrap(), r); 210 | }); 211 | } 212 | } 213 | -------------------------------------------------------------------------------- /netbencher_core/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![feature(get_mut_unchecked, async_fn_in_trait)] 2 | //! Core code for bootstrap the benchmark 3 | //! 4 | //! The code represents a client process running on a client machine. 5 | //! We use a framework to simplfiy bootstraping the tests. 6 | //! 7 | //! The framework ([`BenchRunner`]) will bootstrap a set of (pre-defined) threads to run user-specificed thread body functions. 8 | //! The function is expceted to record some statistics ([`BenchStat`])]), 9 | //! and the framework will automatically collect and report these numbers ([`CollectedBenchStat`]). 10 | //! 11 | //! The [`BenchReporter`] trait will implement various strategies to report the statistics, 12 | //! e.g. print to stdout, write to a file, or send to a remote reporter. 13 | //! Currently, we provide two reporter implementation: 14 | //! - [`SimpleReporter`] will collect and print stats from threads on one server. 15 | //! - [`CoordinatedReporter`] will send the stats to a remote reporter for aggregation. 16 | //! 17 | //! The simplest way is to use the [`BenchRunner::run`] function to run a function on `num_workers` threads: 18 | //! 19 | //! ```no_run 20 | //! use netbencher_core::BenchRunner; 21 | //! 22 | //! let mut runner = BenchRunner::new(4); 23 | //! runner.run(|thread_id, runner, stat, input| { 24 | //! // do something 25 | //! // mark the stats 26 | //! 1 // return the result 27 | //! }, 0); 28 | //! runner.stop(); 29 | //! 30 | //! ``` 31 | //! 32 | //! One can also use a reporter to report the current states of the runner: 33 | //! 34 | //! ```no_run 35 | //! use netbencher_core::BenchRunner; 36 | //! use netbencher_core::SimpleBenchReporter; 37 | //! 38 | //! let mut runner = BenchRunner::new(4); 39 | //! runner.run(|thread_id, runner, stat, input| { 40 | //! while runner.running() { 41 | //! // do something 42 | //! // mark the stats 43 | //! } 44 | //! 1 // return the result 45 | //! }, 0); 46 | //! 47 | //! let mut reporter = SimpleBenchReporter::new(); 48 | //! for _ in 0..10 { 49 | //! std::thread::sleep(std::time::Duration::from_secs(1)); 50 | //! let stat = runner.report(&mut reporter); 51 | //! println!("Results: {}", stat); 52 | //! } 53 | //! 54 | //! runner.stop(); 55 | //! ``` 56 | //! 57 | //! 58 | //! For more exmaples, please refer to the examples folder. 59 | //! 60 | 61 | #![deny(missing_docs)] 62 | 63 | use std::sync::atomic::{AtomicBool, Ordering}; 64 | use std::sync::Arc; 65 | use std::thread::JoinHandle; 66 | 67 | /// Reporter will implement the BenchReporter related modules 68 | mod reporter; 69 | pub use reporter::{ 70 | AsyncBenchReporter, BenchReporter, BenchStat, CollectedBenchStat, CoordinatedReporter, 71 | CoordinatedReporterMaster, SimpleBenchReporter, 72 | }; 73 | 74 | /// Global control data structure to manage the bench workers 75 | /// T : the return type of the worker 76 | /// 77 | pub struct BenchRunner { 78 | handlers: Vec>, 79 | worker_stats: Vec>, 80 | num_workers: usize, 81 | running: AtomicBool, 82 | } 83 | 84 | impl BenchRunner { 85 | /// Create a new bench runner with a given number of workers 86 | pub fn new(num_workers: usize) -> Arc { 87 | Arc::new(Self { 88 | handlers: Vec::new(), 89 | worker_stats: Vec::new(), 90 | num_workers, 91 | running: AtomicBool::new(true), 92 | }) 93 | } 94 | 95 | /// Run a given function on each worker 96 | /// 97 | /// The passsed in function is expected to take the signature of the following: 98 | /// 99 | /// fn worker(thread_id : usize, runner : Arc >, stat : Arc, input : Input) -> T 100 | /// 101 | pub fn run(self: &mut Arc, func: F, input: Input) 102 | where 103 | F: FnOnce(usize, Arc, Arc, Input) -> T + Send + 'static + Clone, 104 | T: Send + 'static + Sync + Copy, 105 | Input: Send + 'static + Sync + Clone, 106 | { 107 | let runner = self.clone(); 108 | let self_mut = unsafe { Arc::get_mut_unchecked(self) }; 109 | 110 | for i in 0..runner.num_workers { 111 | let inner_runner = runner.clone(); 112 | let stat: Arc = Arc::new(Default::default()); 113 | self_mut.worker_stats.push(stat.clone()); 114 | let input_args = input.clone(); 115 | let func = func.clone(); 116 | let handler = std::thread::spawn(move || func(i, inner_runner, stat, input_args)); 117 | self_mut.handlers.push(handler); 118 | } 119 | } 120 | 121 | /// Stop all the workers 122 | pub fn stop(self: &mut Arc) -> std::thread::Result> { 123 | let mut res = Vec::new(); 124 | 125 | let self_mut = unsafe { Arc::get_mut_unchecked(self) }; 126 | self_mut.running.store(false, Ordering::SeqCst); 127 | 128 | while !self_mut.handlers.is_empty() { 129 | res.push(self_mut.handlers.pop().unwrap().join()?); 130 | } 131 | 132 | Ok(res) 133 | } 134 | 135 | /// Report the collected stats from the managed workers 136 | pub fn report(self: &Self, reporter: &mut dyn BenchReporter) -> CollectedBenchStat { 137 | reporter.report_collected_stat(&self.worker_stats) 138 | } 139 | 140 | /// Report the collected stats from the managed workers (async version) 141 | /// Due to compile problem of async trait, currently I will fix the reporter to be CoordinatedReporter 142 | pub async fn report_async( 143 | self: &Self, 144 | reporter: &mut CoordinatedReporter, 145 | ) -> CollectedBenchStat { 146 | reporter.async_report_collect_stat(&self.worker_stats).await 147 | } 148 | 149 | /// Check if the runner is still running 150 | #[inline] 151 | pub fn running(&self) -> bool { 152 | self.running.load(Ordering::Relaxed) 153 | } 154 | } 155 | 156 | mod tests { 157 | #[test] 158 | fn test_simple_runner() { 159 | let mut runner = super::BenchRunner::new(2); 160 | runner.run( 161 | |_, r, _, _| { 162 | while r.running() { 163 | println!("Hello world"); 164 | } 165 | }, 166 | (), 167 | ); 168 | runner.stop().unwrap(); 169 | } 170 | 171 | #[test] 172 | fn test_runner_stop() { 173 | let mut runner = super::BenchRunner::new(2); 174 | runner.run( 175 | |_, r, _, _| { 176 | println!("Hello world"); 177 | while r.running() { 178 | std::thread::yield_now(); 179 | } 180 | }, 181 | (), 182 | ); 183 | 184 | runner.stop().unwrap(); 185 | } 186 | 187 | #[test] 188 | fn test_runner_input_work() { 189 | let mut runner = super::BenchRunner::new(2); 190 | let input: usize = 73; 191 | 192 | runner.run( 193 | |_, r, _, input| { 194 | println!("Hello world"); 195 | while r.running() { 196 | std::thread::yield_now(); 197 | } 198 | assert_eq!(input, 73); 199 | }, 200 | input, 201 | ); 202 | 203 | runner.stop().unwrap(); 204 | } 205 | 206 | #[test] 207 | fn test_runner_output_work() { 208 | let mut runner = super::BenchRunner::new(10); 209 | let input: usize = 73; 210 | 211 | runner.run( 212 | |thread_id, r, _, input| { 213 | println!("Hello world"); 214 | while r.running() { 215 | std::thread::yield_now(); 216 | } 217 | assert_eq!(input, 73); 218 | thread_id 219 | }, 220 | input, 221 | ); 222 | 223 | let res = runner.stop().unwrap(); 224 | 225 | let mut sum = 0; 226 | for i in res { 227 | sum += i; 228 | } 229 | assert_eq!(sum, 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9); 230 | } 231 | } 232 | -------------------------------------------------------------------------------- /benchs/bench_util/src/ud_endpoint.rs: -------------------------------------------------------------------------------- 1 | use serde_derive::{Deserialize, Serialize}; 2 | use std::io::{Read, Write}; 3 | use std::borrow::Borrow; 4 | use std::net::{SocketAddr, TcpStream}; 5 | use std::sync::Arc; 6 | use KRdmaKit::{DatagramEndpoint, QueuePair, QueuePairBuilder, UDriver}; 7 | use KRdmaKit::rdma_shim::bindings::*; 8 | use log::*; 9 | 10 | use crate::MAX_MSG_SZ; 11 | 12 | 13 | pub const TERMINATE_SIG: usize = 1; 14 | 15 | #[derive(Clone, Serialize, Deserialize, Debug)] 16 | pub struct UdMetaRaw { 17 | pub gid: ibv_gid_wrapper, 18 | pub lid: u32, 19 | pub qpn: u32, 20 | pub qkey: u32, 21 | } 22 | 23 | #[derive(Clone)] 24 | pub struct UdMeta { 25 | pub gid: ib_gid, 26 | pub lid: u32, 27 | pub qpn: u32, 28 | pub qkey: u32, 29 | } 30 | #[derive(Clone, Serialize, Deserialize, Debug)] 31 | pub struct UdMetaBatch { 32 | pub a: Vec, 33 | pub b: u32, 34 | } 35 | 36 | #[repr(C)] 37 | #[allow(non_camel_case_types)] 38 | #[derive(Copy, Clone, Serialize, Deserialize, Debug)] 39 | pub struct ibv_gid_wrapper { 40 | raw: [u64; 2usize], 41 | } 42 | 43 | impl From for ibv_gid_wrapper { 44 | fn from(gid: ib_gid) -> Self { 45 | Self { 46 | raw: gid.bindgen_union_field, 47 | } 48 | } 49 | } 50 | 51 | impl Into for ibv_gid_wrapper { 52 | fn into(self) -> ib_gid { 53 | ib_gid { 54 | bindgen_union_field: self.raw, 55 | ..Default::default() 56 | } 57 | } 58 | } 59 | 60 | pub fn marshal(msg: UdMeta) -> Vec { 61 | let data = UdMetaRaw { 62 | gid: ibv_gid_wrapper::from(msg.gid), 63 | lid: msg.lid, 64 | qpn: msg.qpn, 65 | qkey: msg.qkey, 66 | }; 67 | serde_json::to_vec(&data).unwrap() 68 | } 69 | 70 | pub fn unmarshal(raw: &[u8]) -> UdMeta { 71 | let msg: UdMetaRaw = serde_json::from_slice(raw).unwrap(); 72 | UdMeta { 73 | gid: msg.gid.into(), 74 | lid: msg.lid, 75 | qpn: msg.qpn, 76 | qkey: msg.qkey, 77 | } 78 | } 79 | 80 | pub fn marshal_batch(msg: Vec, msg_id: u32) -> Vec { 81 | let data: Vec = msg 82 | .into_iter() 83 | .map(|msg| UdMetaRaw { 84 | gid: ibv_gid_wrapper::from(msg.gid), 85 | lid: msg.lid, 86 | qpn: msg.qpn, 87 | qkey: msg.qkey, 88 | }) 89 | .collect(); 90 | let data = UdMetaBatch { a: data, b: msg_id }; 91 | serde_json::to_vec(&data).unwrap() 92 | } 93 | 94 | pub fn unmarshal_batch(raw: &[u8]) -> (Vec, u32) { 95 | let msg: UdMetaBatch = serde_json::from_slice(raw).unwrap(); 96 | ( 97 | msg.a 98 | .into_iter() 99 | .map(|msg| UdMeta { 100 | gid: msg.gid.into(), 101 | lid: msg.lid, 102 | qpn: msg.qpn, 103 | qkey: msg.qkey, 104 | }) 105 | .collect(), 106 | msg.b, 107 | ) 108 | } 109 | 110 | pub fn bootstrap_uds( 111 | socket: &mut TcpStream, 112 | nic_idx: usize, 113 | nic_num: usize, 114 | threads: usize, 115 | client_id: u64, 116 | ) -> (Vec>, Vec>) { 117 | let mut client_qps = Vec::new(); 118 | let mut client_raw_eps = Vec::new(); 119 | let mut server_datagram_eps = Vec::new(); 120 | 121 | // create one ud qp for each thread 122 | for tid in 0..threads { 123 | let ctx = UDriver::create() 124 | .expect("failed to query device") 125 | .devices() 126 | .get(nic_idx + (tid % nic_num)) 127 | .expect("no rdma device available") 128 | .open_context() 129 | .expect("failed to create RDMA context"); 130 | let client_qp = QueuePairBuilder::new(&ctx) 131 | .build_ud() 132 | .expect("fail to build ud qp") 133 | .bring_up_ud() 134 | .expect("fail to bring up ud qp"); 135 | let client_ep = UdMeta { 136 | gid: client_qp.gid().unwrap(), 137 | lid: client_qp.lid().unwrap(), 138 | qpn: client_qp.qp_num(), 139 | qkey: client_qp.qkey(), 140 | }; 141 | client_qps.push(client_qp); 142 | client_raw_eps.push(client_ep); 143 | } 144 | 145 | // exchange endpoints message to build ud connection 146 | let client_raw_eps_msg = marshal_batch(client_raw_eps, client_id as u32); 147 | let mut msg_buf = [0; MAX_MSG_SZ as usize]; 148 | let byte_send = socket.write(client_raw_eps_msg.as_slice()).unwrap(); 149 | let byte_recv = socket.read(&mut msg_buf).unwrap(); 150 | debug!("[CONN] send {} bytes, recv {} bytes", byte_send, byte_recv); 151 | 152 | let (server_raw_eps, _) = unmarshal_batch(&msg_buf[0..byte_recv]); 153 | let srv_threads = server_raw_eps.len(); 154 | 155 | /* Create server endpoints for each client thread 156 | Notice: in case `threads != srv_threads` 157 | - Create the first `SRV_THREADS` endpoints for each client thread, 158 | then create the remaining according to them 159 | - The mapping between client and server is: 160 | client_thread % srv_threads => server_thread 161 | */ 162 | for (id, server_raw_ep) in server_raw_eps.into_iter().enumerate() { 163 | let ctx = client_qps[id].ctx(); 164 | server_datagram_eps.push(Arc::new( 165 | DatagramEndpoint::new( 166 | &ctx,1, 167 | server_raw_ep.lid, 168 | server_raw_ep.gid, 169 | server_raw_ep.qpn, 170 | server_raw_ep.qkey, 171 | ).unwrap(), 172 | )); 173 | } 174 | for id in srv_threads..(threads as _) { 175 | debug!("client {}->server {}", id, id % srv_threads); 176 | let ctx = client_qps[id].ctx(); 177 | let server_ep: &Arc = server_datagram_eps 178 | .get(id % srv_threads).unwrap().borrow(); 179 | server_datagram_eps.push(Arc::new( 180 | DatagramEndpoint::new( 181 | &ctx,1, 182 | server_ep.lid(), 183 | server_ep.gid(), 184 | server_ep.qpn(), 185 | server_ep.qkey(), 186 | ).unwrap(), 187 | )); 188 | } 189 | (client_qps, server_datagram_eps) 190 | } 191 | 192 | pub fn terminate_server( 193 | socket: &mut TcpStream 194 | ) { 195 | let mut msg: [u8; 1] = [0]; 196 | let byte_send = socket.write(&mut msg).unwrap(); 197 | debug!("Send a {}-byte termination message to server.", byte_send); 198 | assert!(byte_send == TERMINATE_SIG); 199 | } 200 | 201 | pub fn bootstrap_ud_server( 202 | threads: usize, 203 | nic_idx: usize, 204 | nic_num: usize, 205 | ) -> (Vec>, Vec) { 206 | let mut server_qps = Vec::new(); 207 | let mut server_metas = Vec::new(); 208 | 209 | for tid in 0..threads { 210 | let ctx = UDriver::create() 211 | .expect("failed to query device") 212 | .devices() 213 | .get(nic_idx + (tid % nic_num)) 214 | .expect("no rdma device available") 215 | .open_context() 216 | .expect("failed to create RDMA context"); 217 | 218 | let server_qp = QueuePairBuilder::new(&ctx) 219 | .build_ud() 220 | .expect("failed to build UD QP") 221 | .bring_up_ud() 222 | .expect("failed to bring up UD QP"); 223 | let server_meta = UdMeta { 224 | gid: server_qp.gid().unwrap(), 225 | lid: server_qp.lid().unwrap(), 226 | qpn: server_qp.qp_num(), 227 | qkey: server_qp.qkey(), 228 | }; 229 | server_qps.push(server_qp); 230 | server_metas.push(server_meta); 231 | } 232 | (server_qps, server_metas) 233 | } 234 | 235 | pub fn encode_id( 236 | client_id: u32, 237 | thread_id: u32 238 | ) -> u32 239 | { 240 | (client_id << 16) | thread_id 241 | } 242 | 243 | pub fn decode_id( 244 | imm_data: u32 245 | ) -> (u32, u32) { 246 | let client_id: u32 = imm_data >> 16; 247 | let thread_id: u32 = imm_data & (0xffff); 248 | (client_id, thread_id) 249 | } -------------------------------------------------------------------------------- /benchs/doca_dma/src/bootstrap/client_construct.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | use std::ptr::{ NonNull, null_mut }; 3 | use std::time::Duration; 4 | use std::net::SocketAddr; 5 | 6 | use doca::open_device_with_pci; 7 | use doca::dma::{ DOCAContext, DOCADMAJob }; 8 | use doca::{ DOCAError, RawPointer, RawPointerMsg, DOCAResult, LoadedInfo, DOCABuffer, DOCARegisteredMemory, DOCAMmap, BufferInventory, DOCAWorkQueue, DMAEngine }; 9 | 10 | use tokio::net::{ TcpListener }; 11 | use tokio::io::{ AsyncReadExt, AsyncWriteExt }; 12 | use tokio::time::timeout; 13 | use tokio::runtime::Runtime; 14 | 15 | use rand_chacha::ChaCha8Rng; 16 | use rand_chacha::rand_core::SeedableRng; 17 | 18 | use bench_util::doca::args::CmdlineArgs; 19 | use bench_util::round_up; 20 | 21 | use crate::bootstrap::*; 22 | 23 | use netbencher_core::*; 24 | 25 | use nix::libc::*; 26 | 27 | use log::*; 28 | 29 | pub async fn recv_doca_config(addr: SocketAddr) -> Vec { 30 | let mut conn_info = [0u8; DOCA_MAX_CONN_LENGTH]; 31 | let mut conn_info_len = 0; 32 | /* receive the DOCA buffer message from the host */ 33 | let listener = TcpListener::bind(addr).await.unwrap(); 34 | loop { 35 | if let Ok(res) = timeout(Duration::from_secs(1), listener.accept()).await { 36 | let (mut stream, _) = res.unwrap(); 37 | conn_info_len = stream.read(&mut conn_info).await.unwrap(); 38 | break; 39 | } 40 | } 41 | 42 | conn_info[0..conn_info_len].to_vec() 43 | } 44 | 45 | fn load_doca_config(thread_id: usize, doca_conn: &DocaConnInfo) -> DOCAResult { 46 | /* parse the received messages */ 47 | let dev_id = thread_id % doca_conn.exports.len(); 48 | let buf_id = thread_id % doca_conn.buffers.len(); 49 | let mut export_desc_buffer = doca_conn.exports[dev_id].to_vec().into_boxed_slice(); 50 | let export_payload = export_desc_buffer.len(); 51 | Ok(LoadedInfo { 52 | export_desc: RawPointer { 53 | inner: NonNull::new(Box::into_raw(export_desc_buffer) as *mut _).unwrap(), 54 | payload: export_payload, 55 | }, 56 | remote_addr: doca_conn.buffers[buf_id], 57 | }) 58 | } 59 | 60 | #[inline] 61 | fn post_dma_reqs 62 | ( 63 | thread_id: usize, 64 | runner: Arc>, 65 | mut stat: Arc, 66 | args: CmdlineArgs, 67 | mut workq: Arc>, 68 | local_buf: DOCABuffer, 69 | remote_buf: DOCABuffer, 70 | ) 71 | where T: Send + 'static + Sync + Copy 72 | { 73 | let mut src_buf_len = 0; 74 | let mut dst_buf_len = 0; 75 | let (src_buf, dst_buf) = match args.read { 76 | true => { 77 | src_buf_len = args.random_space as usize; 78 | dst_buf_len = args.local_mr as usize; 79 | (remote_buf, local_buf) 80 | } 81 | false => { 82 | src_buf_len = args.local_mr as usize; 83 | dst_buf_len = args.random_space as usize; 84 | (local_buf, remote_buf) 85 | } 86 | }; 87 | 88 | let mut dma_job = workq.create_dma_job(src_buf, dst_buf); 89 | 90 | /* the testing logic of */ 91 | let mut rand = ChaCha8Rng::seed_from_u64( 92 | ((0xdeadbeaf + 73 * thread_id) as u64) + args.client_id * 37 93 | ); 94 | while runner.running() { 95 | let mut start = 0; 96 | /* post dma requests */ 97 | for i in 0..args.batch_size { 98 | let (src_offset, dst_offset) = match args.read { 99 | true => { 100 | (args.get_next_index(thread_id,&mut rand) as usize, start as usize) 101 | }, 102 | false => { 103 | (start as usize, args.get_next_index(thread_id,&mut rand) as usize) 104 | } 105 | }; 106 | 107 | dma_job.set_src_data(src_offset, args.payload as usize); 108 | dma_job.set_dst_data(dst_offset, args.payload as usize); 109 | start += args.payload; 110 | unsafe { 111 | Arc::get_mut_unchecked(&mut workq).submit(&dma_job).expect("failed to submit the job"); 112 | } 113 | } 114 | 115 | /* retrieve dma job results */ 116 | for i in 0..args.batch_size { 117 | loop { 118 | let event = unsafe { 119 | Arc::get_mut_unchecked(&mut workq).poll_completion() 120 | }; 121 | match event { 122 | Ok(_e) => { 123 | break; 124 | } 125 | Err(e) => { 126 | if e == DOCAError::DOCA_ERROR_AGAIN { 127 | continue; 128 | } else { 129 | panic!("Job failed! {:?}", e); 130 | } 131 | } 132 | } 133 | } 134 | } 135 | 136 | unsafe { 137 | Arc::get_mut_unchecked(&mut stat).finished_batch_ops(args.batch_size.try_into().unwrap()); 138 | } 139 | } 140 | } 141 | 142 | pub fn perform_client_routine( 143 | thread_id: usize, 144 | runner: Arc>, 145 | stat: Arc, 146 | conn: Vec, 147 | mut args: CmdlineArgs 148 | ) 149 | where T: Send + 'static + Sync + Copy 150 | { 151 | let doca_conn = DocaConnInfo::deserialize(conn.as_slice()); 152 | let remote_config = load_doca_config(thread_id, &doca_conn).unwrap(); 153 | debug!( 154 | "Check export len {}, remote len {}, remote addr {:?}", 155 | remote_config.export_desc.payload, 156 | remote_config.remote_addr.payload, 157 | remote_config.remote_addr.inner.as_ptr() 158 | ); 159 | 160 | /* allocate local buffer */ 161 | // FIXME: do we need larger buffer for more test scenes? 162 | args.local_mr = args.batch_size as u64 * args.payload; 163 | let mut local_buffer = vec![0u8; args.local_mr as usize].into_boxed_slice(); 164 | let local_region = RawPointer { 165 | inner: match args.huge_page { 166 | false => { 167 | NonNull::new(local_buffer.as_mut_ptr() as *mut _).unwrap() 168 | } 169 | true => { 170 | let capacity = round_up(args.local_mr, 2 << 20); 171 | let data = unsafe { 172 | mmap( 173 | null_mut(), 174 | capacity as size_t, 175 | PROT_READ | PROT_WRITE, 176 | MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE | MAP_HUGETLB, 177 | -1, 178 | 0 179 | ) 180 | }; 181 | 182 | if data == MAP_FAILED { 183 | panic!("Failed to create huge-page MR"); 184 | } 185 | NonNull::new(data).unwrap() 186 | } 187 | }, 188 | payload: args.local_mr as usize, 189 | }; 190 | /* init DOCA core objects */ 191 | let device = open_device_with_pci(args.pci_dev[0].as_str()).unwrap(); 192 | let mut doca_mmap = Arc::new(DOCAMmap::new().unwrap()); 193 | unsafe { 194 | Arc::get_mut_unchecked(&mut doca_mmap).add_device(&device).unwrap(); 195 | } 196 | 197 | let dma = DMAEngine::new().unwrap(); 198 | let ctx = DOCAContext::new(&dma, vec![device.clone()]).unwrap(); 199 | /* work queue depth = batch_size */ 200 | let workq = DOCAWorkQueue::new(args.batch_size.try_into().unwrap(), &ctx).unwrap(); 201 | 202 | let remote_mmap = Arc::new( 203 | DOCAMmap::new_from_export(remote_config.export_desc, &device).unwrap() 204 | ); 205 | 206 | /* register remote doca buffer to the inventory */ 207 | let inv = BufferInventory::new(1024).unwrap(); 208 | let remote_dma_buf = DOCARegisteredMemory::new_from_remote( 209 | &remote_mmap, 210 | remote_config.remote_addr 211 | ) 212 | .unwrap() 213 | .to_buffer(&inv) 214 | .unwrap(); 215 | 216 | /* register local doca buffer to the inventory */ 217 | let local_dma_buf = DOCARegisteredMemory::new(&doca_mmap, local_region) 218 | .unwrap() 219 | .to_buffer(&inv) 220 | .unwrap(); 221 | 222 | post_dma_reqs(thread_id, runner.clone(), stat.clone(), args, Arc::new(workq), local_dma_buf, remote_dma_buf); 223 | } -------------------------------------------------------------------------------- /benchs/two_sided_rdma/src/bootstrap/server_construct.rs: -------------------------------------------------------------------------------- 1 | use std::sync::{ Arc, RwLock }; 2 | use std::borrow::Borrow; 3 | use std::collections::HashMap; 4 | use bench_util::*; 5 | use bench_util::args::*; 6 | use bench_util::doorbell::{ UdDoorbellHelper, RecvDoorbellHelper }; 7 | use bench_util::ud_endpoint::*; 8 | use bench_util::ud_message::*; 9 | 10 | use netbencher_core::*; 11 | 12 | use KRdmaKit::rdma_shim::bindings::*; 13 | use KRdmaKit::{ MemoryRegion, QueuePair, DatagramEndpoint }; 14 | 15 | use log::*; 16 | 17 | pub fn perform_server_routine( 18 | runner: Arc>, 19 | qp: Arc, 20 | conn_meta: Arc>>>, 21 | args: CmdlineArgs 22 | ) 23 | where T: Send + 'static + Sync + Copy 24 | { 25 | let ctx = qp.ctx(); 26 | let mut ud_buffer = UdBuffer::new(MAX_FLYING_MSG, MAX_MSG_SZ); 27 | let region_size = ud_buffer.get_region_size(); 28 | let (send_mr, recv_mr) = if args.huge_page { 29 | ( 30 | MemoryRegion::new_huge_page(ctx.clone(), region_size as _).expect( 31 | "Failed to allocate hugepage MR for send buffer" 32 | ), 33 | MemoryRegion::new_huge_page(ctx.clone(), region_size as _).expect( 34 | "Failed to allocate hugepage MR for send buffer" 35 | ), 36 | ) 37 | } else { 38 | ( 39 | MemoryRegion::new(ctx.clone(), region_size as _).expect( 40 | "Failed to allocate MR for send buffer" 41 | ), 42 | MemoryRegion::new(ctx.clone(), region_size as _).expect( 43 | "Fail to allocate MR for recv buffer" 44 | ), 45 | ) 46 | }; 47 | 48 | let mut recv_doorbell = RecvDoorbellHelper::create(MAX_RECV_NUM, qp.clone()); 49 | for wr_id in 0..MAX_FLYING_MSG { 50 | let start = ud_buffer.get_start_addr(); 51 | recv_doorbell 52 | .post_recv(&recv_mr, start..start + MAX_MSG_SZ, wr_id) 53 | .expect("recv should succ"); 54 | } 55 | 56 | let mut completions = [Default::default(); MAX_FLYING_MSG as usize]; 57 | // cache each client-thread's qp endpoint message to avoid fetch read lock every time 58 | let mut endpoint_cache = HashMap::>::new(); 59 | let batch_or_not = if args.latency_test { 1 } else { args.signal_size }; 60 | let mut pending = 0; // pending unsignaled send requests 61 | 62 | /* payload for reply */ 63 | let payload = align_to_cacheline(0); 64 | // each loop will recv cqs and post replies 65 | while runner.running() { 66 | let recv_comps = qp.poll_recv_cq(&mut completions).unwrap(); 67 | for wc in recv_comps { 68 | let signal = pending == 0; 69 | let wr_id = wc.wr_id; 70 | 71 | #[cfg(feature = "OFED_5_4")] 72 | let ep_id = unsafe { *wc.__bindgen_anon_1.imm_data.as_ref() }; 73 | 74 | #[cfg(not(feature = "OFED_5_4"))] 75 | let ep_id = wc.imm_data; 76 | 77 | let (client_id, client_tid) = decode_id(ep_id); 78 | let endpoint = match endpoint_cache.get(&ep_id) { 79 | None => { 80 | let client_meta = conn_meta 81 | .read() 82 | .unwrap() 83 | .get(&client_id) 84 | .unwrap() 85 | .get(client_tid as usize) 86 | .unwrap() 87 | .clone(); 88 | // create the cache entry 89 | let new_endpoint = Arc::new( 90 | DatagramEndpoint::new( 91 | qp.ctx(), 92 | 1, 93 | client_meta.lid, 94 | client_meta.gid, 95 | client_meta.qpn, 96 | client_meta.qkey 97 | ).unwrap() 98 | ); 99 | endpoint_cache.insert(ep_id, new_endpoint); 100 | endpoint_cache.get(&ep_id).unwrap().borrow() 101 | } 102 | Some(old_endpoint) => { old_endpoint.borrow() } 103 | }; 104 | let start = ud_buffer.get_start_addr(); 105 | 106 | /* reply to client */ 107 | qp.post_datagram(endpoint, &send_mr, start..start + payload, wr_id, signal).expect( 108 | "send should succeed" 109 | ); 110 | pending += 1; 111 | recv_doorbell 112 | .post_recv(&recv_mr, start..start + MAX_MSG_SZ, wr_id) 113 | .expect("recv should succ"); 114 | 115 | if pending >= batch_or_not { 116 | let mut ok = false; 117 | let mut completions = [Default::default()]; 118 | while !ok { 119 | let ret = qp.poll_send_cq(&mut completions).expect("Failed to poll send cq"); 120 | if ret.len() > 0 { 121 | if ret[0].status != 0 { 122 | panic!("cq status: {}", ret[0].status); 123 | } 124 | ok = true; 125 | } else { 126 | } 127 | } 128 | pending = 0; 129 | } 130 | } 131 | } 132 | } 133 | 134 | pub fn perform_server_doorbell_routine( 135 | runner: Arc>, 136 | qp: Arc, 137 | conn_meta: Arc>>>, 138 | args: CmdlineArgs 139 | ) 140 | where T: Send + 'static + Sync + Copy 141 | { 142 | let ctx = qp.ctx(); 143 | let mut ud_buffer = UdBuffer::new(MAX_FLYING_MSG, MAX_MSG_SZ); 144 | let region_size = ud_buffer.get_region_size(); 145 | let (send_mr, recv_mr) = if args.huge_page { 146 | ( 147 | MemoryRegion::new_huge_page(ctx.clone(), region_size as _).expect( 148 | "Failed to allocate hugepage MR for send buffer" 149 | ), 150 | MemoryRegion::new_huge_page(ctx.clone(), region_size as _).expect( 151 | "Failed to allocate hugepage MR for send buffer" 152 | ), 153 | ) 154 | } else { 155 | ( 156 | MemoryRegion::new(ctx.clone(), region_size as _).expect( 157 | "Failed to allocate MR for send buffer" 158 | ), 159 | MemoryRegion::new(ctx.clone(), region_size as _).expect( 160 | "Fail to allocate MR for recv buffer" 161 | ), 162 | ) 163 | }; 164 | 165 | let mut ud_doorbell = UdDoorbellHelper::create( 166 | args.db_size, 167 | ibv_wr_opcode::IBV_WR_SEND, 168 | qp.clone() 169 | ); 170 | let mut recv_doorbell = RecvDoorbellHelper::create(MAX_RECV_NUM, qp.clone()); 171 | for wr_id in 0..MAX_FLYING_MSG { 172 | let start = ud_buffer.get_start_addr(); 173 | recv_doorbell 174 | .post_recv(&recv_mr, start..start + MAX_MSG_SZ, wr_id) 175 | .expect("recv should succ"); 176 | } 177 | 178 | let mut completions = [Default::default(); MAX_FLYING_MSG as usize]; 179 | // cache each client-thread's qp endpoint message to avoid fetch read lock every time 180 | let mut endpoint_cache = HashMap::>::new(); 181 | let batch_or_not = if args.latency_test { 1 } else { args.signal_size }; 182 | let mut pending = 0; // pending unsignaled send requests 183 | 184 | /* payload for reply */ 185 | let payload = align_to_cacheline(0); 186 | // each loop will recv cqs and post replies w/ doorbell 187 | while runner.running() { 188 | let recv_comps = qp.poll_recv_cq(&mut completions).unwrap(); 189 | for wc in recv_comps { 190 | let signal = pending == 0; 191 | let wr_id = wc.wr_id; 192 | 193 | #[cfg(feature = "OFED_5_4")] 194 | let ep_id = unsafe { *wc.__bindgen_anon_1.imm_data.as_ref() }; 195 | 196 | #[cfg(not(feature = "OFED_5_4"))] 197 | let ep_id = wc.imm_data; 198 | 199 | let (client_id, client_tid) = decode_id(ep_id); 200 | let endpoint = match endpoint_cache.get(&ep_id) { 201 | None => { 202 | let client_meta = conn_meta 203 | .read() 204 | .unwrap() 205 | .get(&client_id) 206 | .unwrap() 207 | .get(client_tid as usize) 208 | .unwrap() 209 | .clone(); 210 | // create the cache entry 211 | let new_endpoint = Arc::new( 212 | DatagramEndpoint::new( 213 | qp.ctx(), 214 | 1, 215 | client_meta.lid, 216 | client_meta.gid, 217 | client_meta.qpn, 218 | client_meta.qkey 219 | ).unwrap() 220 | ); 221 | endpoint_cache.insert(ep_id, new_endpoint); 222 | endpoint_cache.get(&ep_id).unwrap().borrow() 223 | } 224 | Some(old_endpoint) => { old_endpoint.borrow() } 225 | }; 226 | let start = ud_buffer.get_start_addr(); 227 | ud_doorbell 228 | .post_send(endpoint, &send_mr, start..start + payload, wr_id, None, signal) 229 | .expect("send should succeed"); 230 | pending += 1; 231 | recv_doorbell 232 | .post_recv(&recv_mr, start..start + MAX_MSG_SZ, wr_id) 233 | .expect("recv should succ"); 234 | 235 | if pending >= batch_or_not { 236 | let mut ok = false; 237 | let mut completions = [Default::default()]; 238 | while !ok { 239 | let ret = qp.poll_send_cq(&mut completions).expect("Failed to poll send cq"); 240 | if ret.len() > 0 { 241 | if ret[0].status != 0 { 242 | error!("cq status: {}", ret[0].status); 243 | } 244 | ok = true; 245 | } 246 | } 247 | pending = 0; 248 | } 249 | } 250 | } 251 | } -------------------------------------------------------------------------------- /benchs/one_sided_rdma/src/bootstrap/client_construct.rs: -------------------------------------------------------------------------------- 1 | use std::sync::{ Arc }; 2 | use bench_util::args::*; 3 | use bench_util::doorbell::RcDoorbellHelper; 4 | 5 | use rand_chacha::rand_core::SeedableRng; 6 | use rand_chacha::ChaCha8Rng; 7 | 8 | use netbencher_core::*; 9 | 10 | use KRdmaKit::rdma_shim::bindings::*; 11 | 12 | use log::*; 13 | 14 | pub fn perform_client_routine( 15 | thread_id: usize, 16 | runner: Arc>, 17 | mut stat: Arc, 18 | args: CmdlineArgs 19 | ) 20 | where T: Send + 'static + Sync + Copy 21 | { 22 | let batch_or_not = if args.latency_test { 1 } else { args.signal_size }; 23 | let (qp, client_mr, server_meta) = match args.create_rc(thread_id) { 24 | Ok(res) => res, 25 | Err(()) => { panic!("Fail to bring up RC qp!") } 26 | }; 27 | let mut rand = ChaCha8Rng::seed_from_u64( 28 | ((0xdeadbeaf + 73 * thread_id) as u64) + args.client_id * 37 29 | ); 30 | let mut completions = [Default::default()]; 31 | 32 | let mut pending: usize = 0; 33 | let start = 0; 34 | 35 | while runner.running() { 36 | std::sync::atomic::compiler_fence(std::sync::atomic::Ordering::Release); 37 | let mut start = 0; 38 | for i in 0..args.factor { 39 | let index = args.get_next_index(thread_id, &mut rand); 40 | let signal = pending == 0; 41 | if args.read { 42 | qp.post_send_read( 43 | &client_mr, 44 | start..start + args.payload, 45 | signal, 46 | server_meta.addr + index, 47 | server_meta.rkey, 48 | i 49 | ).expect("read should succeeed"); 50 | } else { 51 | qp.post_send_write( 52 | &client_mr, 53 | start..start + args.payload, 54 | signal, 55 | server_meta.addr + index, 56 | server_meta.rkey, 57 | i 58 | ).expect("read should succeeed"); 59 | } 60 | start += args.payload; 61 | pending += 1; 62 | if pending >= batch_or_not { 63 | let mut ok = false; 64 | while !ok { 65 | let ret = qp.poll_send_cq(&mut completions).expect("Failed to poll cq"); 66 | if ret.len() > 0 { 67 | if ret[0].status != 0 { 68 | error!("read remote addr: {:?} err: {}", index, ret[0].status); 69 | } 70 | assert_eq!(ret[0].status, 0); 71 | ok = true; 72 | } 73 | } 74 | pending = 0; 75 | } 76 | } 77 | unsafe { 78 | Arc::get_mut_unchecked(&mut stat).finished_batch_ops(args.factor); 79 | } 80 | } // end of main benchmark loop 81 | } 82 | 83 | pub fn perform_client_doorbell_routine( 84 | thread_id: usize, 85 | runner: Arc>, 86 | mut stat: Arc, 87 | args: CmdlineArgs 88 | ) 89 | where T: Send + 'static + Sync + Copy 90 | { 91 | let batch_or_not = if args.latency_test { 1 } else { args.signal_size }; 92 | let (qp, client_mr, server_meta) = match args.create_rc(thread_id) { 93 | Ok(res) => res, 94 | Err(()) => { panic!("Fail to bring up RC qp!") } 95 | }; 96 | let mut rand = ChaCha8Rng::seed_from_u64( 97 | ((0xdeadbeaf + 73 * thread_id) as u64) + args.client_id * 37 98 | ); 99 | 100 | let mut completions = [Default::default()]; 101 | let mut pending: usize = 0; 102 | let mut rc_doorbell = RcDoorbellHelper::create(args.db_size, qp.clone()); 103 | if args.read { 104 | rc_doorbell.init(ibv_wr_opcode::IBV_WR_RDMA_READ); 105 | } else { 106 | rc_doorbell.init(ibv_wr_opcode::IBV_WR_RDMA_WRITE); 107 | } 108 | 109 | while runner.running() { 110 | std::sync::atomic::compiler_fence(std::sync::atomic::Ordering::Release); 111 | let mut start = 0; 112 | for i in 0..args.factor { 113 | let index = args.get_next_index(thread_id, &mut rand); 114 | // start = (start + std::cmp::max(PAYLOAD, 64)) % ((LOCAL_MR - PAYLOAD) as u64); 115 | let signal = pending == 0; 116 | 117 | rc_doorbell 118 | .post_send( 119 | &client_mr, 120 | start..start + args.payload, 121 | signal, 122 | server_meta.addr + index, 123 | server_meta.rkey, 124 | i 125 | ) 126 | .expect("read should succeeed"); 127 | 128 | start += args.payload; 129 | pending += 1; 130 | if pending >= batch_or_not { 131 | let mut ok = false; 132 | while !ok { 133 | let ret = qp.poll_send_cq(&mut completions).expect("Failed to poll cq"); 134 | if ret.len() > 0 { 135 | if ret[0].status != 0 { 136 | error!("read remote addr: {:?} err", index); 137 | } 138 | assert_eq!(ret[0].status, 0); 139 | ok = true; 140 | } 141 | } 142 | pending = 0; 143 | } 144 | } 145 | unsafe { 146 | Arc::get_mut_unchecked(&mut stat).finished_batch_ops(args.factor); 147 | } 148 | } 149 | } 150 | 151 | pub fn perform_client_signaled_routine( 152 | thread_id: usize, 153 | runner: Arc>, 154 | mut stat: Arc, 155 | args: CmdlineArgs 156 | ) 157 | where T: Send + 'static + Sync + Copy 158 | { 159 | let (qp, client_mr, server_meta) = match args.create_rc(thread_id) { 160 | Ok(res) => res, 161 | Err(()) => { panic!("Fail to bring up RC qp!") } 162 | }; 163 | let mut rand = ChaCha8Rng::seed_from_u64( 164 | ((0xdeadbeaf + 73 * thread_id) as u64) + args.client_id * 37 165 | ); 166 | let mut completions = [Default::default()]; 167 | 168 | while runner.running() { 169 | std::sync::atomic::compiler_fence(std::sync::atomic::Ordering::Release); 170 | let mut start = 0; 171 | for i in 0..args.factor { 172 | let index = args.get_next_index(thread_id, &mut rand); 173 | // start = (start + std::cmp::max(PAYLOAD, 64)) % ((LOCAL_MR - PAYLOAD) as u64); 174 | if args.read { 175 | qp.post_send_read( 176 | &client_mr, 177 | start..start + args.payload, 178 | true, 179 | server_meta.addr + index, 180 | server_meta.rkey, 181 | i 182 | ).expect("read should succeeed"); 183 | } else { 184 | qp.post_send_write( 185 | &client_mr, 186 | start..start + args.payload, 187 | true, 188 | server_meta.addr + index, 189 | server_meta.rkey, 190 | i 191 | ).expect("read should succeeed"); 192 | } 193 | 194 | start += args.payload; 195 | } 196 | 197 | for _ in 0..args.factor { 198 | let mut ok = false; 199 | while !ok { 200 | let ret = qp.poll_send_cq(&mut completions).expect("Failed to poll cq"); 201 | if ret.len() > 0 { 202 | if ret[0].status != 0 { 203 | } 204 | assert_eq!(ret[0].status, 0); 205 | ok = true; 206 | } 207 | } 208 | } 209 | unsafe { 210 | Arc::get_mut_unchecked(&mut stat).finished_batch_ops(args.factor); 211 | } 212 | } // end of main benchmark loop 213 | } 214 | 215 | pub fn perform_client_doorbell_signaled_routine( 216 | thread_id: usize, 217 | runner: Arc>, 218 | mut stat: Arc, 219 | args: CmdlineArgs 220 | ) 221 | where T: Send + 'static + Sync + Copy 222 | { 223 | let batch_or_not = 1; 224 | let (qp, client_mr, server_meta) = match args.create_rc(thread_id) { 225 | Ok(res) => res, 226 | Err(()) => { panic!("Fail to bring up RC qp!") } 227 | }; 228 | let mut rand = ChaCha8Rng::seed_from_u64( 229 | ((0xdeadbeaf + 73 * thread_id) as u64) + args.client_id * 37 230 | ); 231 | 232 | let mut completions = [Default::default()]; 233 | let mut pending: usize = 0; 234 | let mut rc_doorbell = RcDoorbellHelper::create(args.db_size, qp.clone()); 235 | if args.read { 236 | rc_doorbell.init(ibv_wr_opcode::IBV_WR_RDMA_READ); 237 | } else { 238 | rc_doorbell.init(ibv_wr_opcode::IBV_WR_RDMA_WRITE); 239 | } 240 | 241 | while runner.running() { 242 | std::sync::atomic::compiler_fence(std::sync::atomic::Ordering::Release); 243 | let mut start = 0; 244 | for i in 0..args.factor { 245 | let index = args.get_next_index(thread_id, &mut rand); 246 | // start = (start + std::cmp::max(PAYLOAD, 64)) % ((LOCAL_MR - PAYLOAD) as u64); 247 | let signal = pending == 0; 248 | 249 | rc_doorbell 250 | .post_send( 251 | &client_mr, 252 | start..start + args.payload, 253 | signal, 254 | server_meta.addr + index, 255 | server_meta.rkey, 256 | i 257 | ) 258 | .expect("read should succeeed"); 259 | start += args.payload; 260 | pending += 1; 261 | if pending >= batch_or_not { 262 | let mut ok = false; 263 | while !ok { 264 | let ret = qp.poll_send_cq(&mut completions).expect("Failed to poll cq"); 265 | if ret.len() > 0 { 266 | if ret[0].status != 0 { 267 | error!("read remote addr: {:?} err", index); 268 | } 269 | assert_eq!(ret[0].status, 0); 270 | ok = true; 271 | } 272 | } 273 | pending = 0; 274 | } 275 | } 276 | unsafe { 277 | Arc::get_mut_unchecked(&mut stat).finished_batch_ops(args.factor); 278 | } 279 | } 280 | } -------------------------------------------------------------------------------- /makefile: -------------------------------------------------------------------------------- 1 | 2 | user=xxx 3 | passwd=xxx 4 | 5 | generator=scripts/toml_generator.py 6 | runner=scripts/toml_runner.py 7 | 8 | # arg1: template filepath 9 | # arg2: output directory 10 | # arg3: toml arguments dictory 11 | # arg4: evaluation logs directory 12 | 13 | # Run one command in one second in .toml 14 | define sep_bench 15 | mkdir -p $(2) $(4) 16 | python3 $(generator) -f $(1) -o $(2) -d $(3) 17 | python3 $(runner) -i $(2) -l $(4) -u $(user) -p $(passwd) -s True 18 | endef 19 | 20 | # Run all commands in .toml simutaneously 21 | define bench 22 | mkdir -p $(2) $(4) 23 | python3 $(generator) -f $(1) -o $(2) -d $(3) 24 | python3 $(runner) -i $(2) -l $(4) -u $(user) -p $(passwd) -s False 25 | endef 26 | 27 | 28 | # clients=['val00', 'val01', 'val02', 'val03'] 29 | clients=['val00', 'val01'] 30 | bonus_clients=['val02', 'val03'] 31 | lat_client=['val00'] 32 | rdma_server=['pro2'] 33 | rdma_srv_ip='192.168.12.10' 34 | snic_server=['pro1'] 35 | snic_srv_ip='192.168.12.138' 36 | soc=['snic-pro1'] 37 | soc_ip='192.168.12.114' 38 | rdma_server_nic=1 39 | snic_server_nic=0 40 | client_nic=0 41 | parent='~/rocc' 42 | 43 | x86_root='smartnic-bench' 44 | soc_root='smartnic-bench/soc' 45 | 46 | rdma_machines={'server': ${rdma_server}, 'client': ${clients}} 47 | rdma_lat_machines={'server': ${rdma_server}, 'client': ${lat_client}} 48 | snic_machines={'server': ${snic_server}, 'client': ${clients}} 49 | snic_lat_machines={'server': ${snic_server}, 'client': ${lat_client}} 50 | soc_machines={'server': ${soc}, 'client': ${clients}} 51 | soc_lat_machines={'server': ${soc}, 'client': ${lat_client}} 52 | soc_intranode_machines={'server': ${soc}, 'client': ${snic_server}} 53 | snic_1_2_machines={'server': ${snic_server}, 'soc': ${soc}, \ 54 | 'host_client': ${clients}, 'soc_client': ${bonus_clients}} 55 | snic_1_3_machines={'server': ${snic_server}, 'soc': ${soc}, \ 56 | 'client': ${clients}} 57 | 58 | thpt_lat_results=throughput_latency/scripts/results 59 | payloads=[0,16,32,64,128,256,512,1024] 60 | 61 | tem_read_thpt=throughput_latency/scripts/templates/thpt/read.toml 62 | tem_read_lat=throughput_latency/scripts/templates/lat/read.toml 63 | tem_1_2_read_thpt=throughput_latency/scripts/templates/thpt/1_2_read.toml 64 | tem_1_3_read_thpt=throughput_latency/scripts/templates/thpt/1_3_read.toml 65 | tem_write_thpt=throughput_latency/scripts/templates/thpt/write.toml 66 | tem_write_lat=throughput_latency/scripts/templates/lat/write.toml 67 | tem_1_2_write_thpt=throughput_latency/scripts/templates/thpt/1_2_write.toml 68 | tem_1_3_write_thpt=throughput_latency/scripts/templates/thpt/1_3_write.toml 69 | tem_rpc_thpt=throughput_latency/scripts/templates/thpt/rpc.toml 70 | tem_rpc_lat=throughput_latency/scripts/templates/lat/rpc.toml 71 | tem_1_2_rpc_thpt=throughput_latency/scripts/templates/thpt/1_2_rpc.toml 72 | tem_1_3_rpc_thpt=throughput_latency/scripts/templates/thpt/1_3_rpc.toml 73 | 74 | rdma_args="{'hosts': ${rdma_machines}, \ 75 | 'placeholder': {'payload': ${payloads}, \ 76 | 'server_ip': ${rdma_srv_ip}, \ 77 | 'server_root': ${x86_root}, \ 78 | 'server_nic': ${rdma_server_nic}, \ 79 | 'client_root': ${x86_root}, \ 80 | 'client_nic': ${client_nic}}, \ 81 | 'path': ${parent}}" 82 | rdma_lat_args="{'hosts': ${rdma_lat_machines}, \ 83 | 'placeholder': {'payload': ${payloads}, \ 84 | 'server_ip': ${rdma_srv_ip}, \ 85 | 'server_root': ${x86_root}, \ 86 | 'server_nic': ${rdma_server_nic}, \ 87 | 'client_root': ${x86_root}, \ 88 | 'client_nic': ${client_nic}}, \ 89 | 'path': ${parent}}" 90 | 91 | snic_1_args="{'hosts': ${snic_machines}, \ 92 | 'placeholder': {'payload': ${payloads}, \ 93 | 'server_ip': ${snic_srv_ip}, \ 94 | 'server_root': ${x86_root}, \ 95 | 'server_nic': ${snic_server_nic}, \ 96 | 'client_root': ${x86_root}, \ 97 | 'client_nic': ${client_nic}}, \ 98 | 'path': ${parent}}" 99 | snic_1_lat_args="{'hosts': ${snic_lat_machines}, \ 100 | 'placeholder': {'payload': ${payloads}, \ 101 | 'server_ip': ${snic_srv_ip}, \ 102 | 'server_root': ${x86_root}, \ 103 | 'server_nic': ${snic_server_nic}, \ 104 | 'client_root': ${x86_root}, \ 105 | 'client_nic': ${client_nic}}, \ 106 | 'path': ${parent}}" 107 | 108 | snic_2_args="{'hosts': ${soc_machines}, \ 109 | 'placeholder': {'payload': ${payloads}, \ 110 | 'server_ip': ${soc_ip}, \ 111 | 'server_root': ${soc_root}, \ 112 | 'server_nic': ${snic_server_nic}, \ 113 | 'client_root': ${x86_root}, \ 114 | 'client_nic': ${client_nic}}, \ 115 | 'path': ${parent}}" 116 | snic_2_lat_args="{'hosts': ${soc_lat_machines}, \ 117 | 'placeholder': {'payload': ${payloads}, \ 118 | 'server_ip': ${soc_ip}, \ 119 | 'server_root': ${soc_root}, \ 120 | 'server_nic': ${snic_server_nic}, \ 121 | 'client_root': ${x86_root}, \ 122 | 'client_nic': ${client_nic}}, \ 123 | 'path': ${parent}}" 124 | 125 | snic_3_args="{'hosts': ${soc_intranode_machines}, \ 126 | 'placeholder': {'payload': ${payloads}, \ 127 | 'server_ip': ${soc_ip}, \ 128 | 'server_root': ${soc_root}, \ 129 | 'server_nic': ${snic_server_nic}, \ 130 | 'client_root': ${x86_root}, \ 131 | 'client_nic': ${client_nic}}, \ 132 | 'path': ${parent}}" 133 | 134 | snic_1_2_args="{'hosts': ${snic_1_2_machines}, \ 135 | 'placeholder': {'payload': ${payloads}, \ 136 | 'server_ip': ${snic_srv_ip}, \ 137 | 'server_root': ${x86_root}, \ 138 | 'server_nic': ${snic_server_nic}, \ 139 | 'soc_ip': ${soc_ip}, \ 140 | 'soc_root': ${soc_root}, \ 141 | 'soc_nic': 0, \ 142 | 'host_client_root': ${x86_root}, \ 143 | 'soc_client_root': ${x86_root}', \ 144 | 'client_nic': ${client_nic}}, \ 145 | 'path': ${parent}}" 146 | 147 | snic_1_3_args="{'hosts': ${snic_1_3_machines}, \ 148 | 'placeholder': {'payload': ${payloads}, \ 149 | 'server_ip': ${snic_srv_ip}, \ 150 | 'server_root': ${x86_root}, \ 151 | 'server_nic': ${snic_server_nic}, \ 152 | 'soc_ip': ${soc_ip}, \ 153 | 'soc_root': ${soc_root}, \ 154 | 'soc_nic': 0, \ 155 | 'client_root': ${x86_root}, \ 156 | 'client_nic': ${client_nic}}, \ 157 | 'path': ${parent}}" 158 | 159 | rdma_read_thpt: 160 | $(call bench, \ 161 | ${tem_read_thpt}, \ 162 | ${thpt_lat_results}/rdma_read_thpt, \ 163 | ${rdma_args}, \ 164 | ${thpt_lat_results}/rdma_read_thpt/logs) 165 | rdma_read_lat: 166 | $(call bench, \ 167 | ${tem_read_lat}, \ 168 | ${thpt_lat_results}/rdma_read_lat, \ 169 | ${rdma_lat_args}, \ 170 | ${thpt_lat_results}/rdma_read_lat/logs) 171 | 172 | snic_1_read_thpt: 173 | $(call bench, \ 174 | ${tem_read_thpt}, \ 175 | ${thpt_lat_results}/snic_1_read_thpt, \ 176 | ${snic_1_args}, \ 177 | ${thpt_lat_results}/snic_1_read_thpt/logs) 178 | snic_1_read_lat: 179 | $(call bench, \ 180 | ${tem_read_lat}, \ 181 | ${thpt_lat_results}/snic_1_read_lat, \ 182 | ${snic_1_lat_args}, \ 183 | ${thpt_lat_results}/snic_1_read_lat/logs) 184 | 185 | snic_2_read_thpt: 186 | $(call bench, \ 187 | ${tem_read_thpt}, \ 188 | ${thpt_lat_results}/snic_2_read_thpt, \ 189 | ${snic_2_args}, \ 190 | ${thpt_lat_results}/snic_2_read_thpt/logs) 191 | snic_2_read_lat: 192 | $(call bench, \ 193 | ${tem_read_lat}, \ 194 | ${thpt_lat_results}/snic_2_read_lat, \ 195 | ${snic_2_lat_args}, \ 196 | ${thpt_lat_results}/snic_2_read_lat/logs) 197 | 198 | snic_3_read_thpt: 199 | $(call bench, \ 200 | ${tem_read_thpt}, \ 201 | ${thpt_lat_results}/snic_3_read_thpt, \ 202 | ${snic_3_args}, \ 203 | ${thpt_lat_results}/snic_3_read_thpt/logs) 204 | snic_3_read_lat: 205 | $(call bench, \ 206 | ${tem_read_lat}, \ 207 | ${thpt_lat_results}/snic_3_read_lat, \ 208 | ${snic_3_args}, \ 209 | ${thpt_lat_results}/snic_3_read_lat/logs) 210 | 211 | snic_1_2_read_thpt: 212 | $(call bench, \ 213 | ${tem_1_2_read_thpt}, \ 214 | ${thpt_lat_results}/snic_1_2_read_thpt, \ 215 | ${snic_1_2_args}, \ 216 | ${thpt_lat_results}/snic_1_2_read_thpt/logs) 217 | 218 | snic_1_3_read_thpt: 219 | $(call bench, \ 220 | ${tem_1_3_read_thpt}, \ 221 | ${thpt_lat_results}/snic_1_3_read_thpt, \ 222 | ${snic_1_3_args}, \ 223 | ${thpt_lat_results}/snic_1_3_read_thpt/logs) 224 | 225 | rdma_write_thpt: 226 | $(call bench, \ 227 | ${tem_write_thpt}, \ 228 | ${thpt_lat_results}/rdma_write_thpt, \ 229 | ${rdma_args}, \ 230 | ${thpt_lat_results}/rdma_write_thpt/logs) 231 | rdma_write_lat: 232 | $(call bench, \ 233 | ${tem_write_lat}, \ 234 | ${thpt_lat_results}/rdma_write_lat, \ 235 | ${rdma_lat_args}, \ 236 | ${thpt_lat_results}/rdma_write_lat/logs) 237 | 238 | snic_1_write_thpt: 239 | $(call bench, \ 240 | ${tem_write_thpt}, \ 241 | ${thpt_lat_results}/snic_1_write_thpt, \ 242 | ${snic_1_args}, \ 243 | ${thpt_lat_results}/snic_1_write_thpt/logs) 244 | snic_1_write_lat: 245 | $(call bench, \ 246 | ${tem_write_lat}, \ 247 | ${thpt_lat_results}/snic_1_write_lat, \ 248 | ${snic_1_lat_args}, \ 249 | ${thpt_lat_results}/snic_1_write_lat/logs) 250 | 251 | snic_2_write_thpt: 252 | $(call bench, \ 253 | ${tem_write_thpt}, \ 254 | ${thpt_lat_results}/snic_2_write_thpt, \ 255 | ${snic_2_args}, \ 256 | ${thpt_lat_results}/snic_2_write_thpt/logs) 257 | snic_2_write_lat: 258 | $(call bench, \ 259 | ${tem_write_lat}, \ 260 | ${thpt_lat_results}/snic_2_write_lat, \ 261 | ${snic_2_lat_args}, \ 262 | ${thpt_lat_results}/snic_2_write_lat/logs) 263 | 264 | snic_3_write_thpt: 265 | $(call bench, \ 266 | ${tem_write_thpt}, \ 267 | ${thpt_lat_results}/snic_3_write_thpt, \ 268 | ${snic_3_args}, \ 269 | ${thpt_lat_results}/snic_3_write_thpt/logs) 270 | snic_3_write_lat: 271 | $(call bench, \ 272 | ${tem_write_lat}, \ 273 | ${thpt_lat_results}/snic_3_write_lat, \ 274 | ${snic_3_args}, \ 275 | ${thpt_lat_results}/snic_3_write_lat/logs) 276 | 277 | snic_1_2_write_thpt: 278 | $(call bench, \ 279 | ${tem_1_2_write_thpt}, \ 280 | ${thpt_lat_results}/snic_1_2_write_thpt, \ 281 | ${snic_1_2_args}, \ 282 | ${thpt_lat_results}/snic_1_2_write_thpt/logs) 283 | 284 | snic_1_3_write_thpt: 285 | $(call bench, \ 286 | ${tem_1_3_write_thpt}, \ 287 | ${thpt_lat_results}/snic_1_3_write_thpt, \ 288 | ${snic_1_3_args}, \ 289 | ${thpt_lat_results}/snic_1_3_write_thpt/logs) 290 | 291 | rdma_rpc_thpt: 292 | $(call bench, \ 293 | ${tem_rpc_thpt}, \ 294 | ${thpt_lat_results}/rdma_rpc_thpt, \ 295 | ${rdma_args}, \ 296 | ${thpt_lat_results}/rdma_rpc_thpt/logs) 297 | rdma_rpc_lat: 298 | $(call bench, \ 299 | ${tem_rpc_lat}, \ 300 | ${thpt_lat_results}/rdma_rpc_lat, \ 301 | ${rdma_lat_args}, \ 302 | ${thpt_lat_results}/rdma_rpc_lat/logs) 303 | 304 | snic_1_rpc_thpt: 305 | $(call bench, \ 306 | ${tem_rpc_thpt}, \ 307 | ${thpt_lat_results}/snic_1_rpc_thpt, \ 308 | ${snic_1_args}, \ 309 | ${thpt_lat_results}/snic_1_rpc_thpt/logs) 310 | snic_1_rpc_lat: 311 | $(call bench, \ 312 | ${tem_rpc_lat}, \ 313 | ${thpt_lat_results}/snic_1_rpc_lat, \ 314 | ${snic_1_lat_args}, \ 315 | ${thpt_lat_results}/snic_1_rpc_lat/logs) 316 | 317 | snic_2_rpc_thpt: 318 | $(call bench, \ 319 | ${tem_rpc_thpt}, \ 320 | ${thpt_lat_results}/snic_2_rpc_thpt, \ 321 | ${snic_2_args}, \ 322 | ${thpt_lat_results}/snic_2_rpc_thpt/logs) 323 | snic_2_rpc_lat: 324 | $(call bench, \ 325 | ${tem_rpc_lat}, \ 326 | ${thpt_lat_results}/snic_2_rpc_lat, \ 327 | ${snic_2_lat_args}, \ 328 | ${thpt_lat_results}/snic_2_rpc_lat/logs) 329 | 330 | snic_3_rpc_thpt: 331 | $(call bench, \ 332 | ${tem_rpc_thpt}, \ 333 | ${thpt_lat_results}/snic_3_rpc_thpt, \ 334 | ${snic_3_args}, \ 335 | ${thpt_lat_results}/snic_3_rpc_thpt/logs) 336 | snic_3_rpc_lat: 337 | $(call bench, \ 338 | ${tem_rpc_lat}, \ 339 | ${thpt_lat_results}/snic_3_rpc_lat, \ 340 | ${snic_3_args}, \ 341 | ${thpt_lat_results}/snic_3_rpc_lat/logs) 342 | 343 | snic_1_2_rpc_thpt: 344 | $(call bench, \ 345 | ${tem_1_2_rpc_thpt}, \ 346 | ${thpt_lat_results}/snic_1_2_rpc_thpt, \ 347 | ${snic_1_2_args}, \ 348 | ${thpt_lat_results}/snic_1_2_rpc_thpt/logs) 349 | 350 | snic_1_3_rpc_thpt: 351 | $(call bench, \ 352 | ${tem_1_3_rpc_thpt}, \ 353 | ${thpt_lat_results}/snic_1_3_rpc_thpt, \ 354 | ${snic_1_3_args}, \ 355 | ${thpt_lat_results}/snic_1_3_rpc_thpt/logs) 356 | -------------------------------------------------------------------------------- /benchs/two_sided_rdma/src/bootstrap/client_construct.rs: -------------------------------------------------------------------------------- 1 | use std::sync::{ Arc }; 2 | use std::borrow::Borrow; 3 | use bench_util::*; 4 | use bench_util::args::*; 5 | use bench_util::doorbell::{ UdDoorbellHelper, RecvDoorbellHelper }; 6 | use bench_util::ud_message::*; 7 | use bench_util::ud_endpoint::*; 8 | 9 | #[cfg(not(feature = "ARM"))] 10 | use bench_util::rdtsc::*; 11 | 12 | use netbencher_core::*; 13 | 14 | use KRdmaKit::rdma_shim::bindings::*; 15 | use KRdmaKit::{ QueuePair, DatagramEndpoint, MemoryRegion }; 16 | 17 | use log::*; 18 | 19 | pub fn perform_client_routine( 20 | thread_id: usize, 21 | runner: Arc>, 22 | mut stat: Arc, 23 | client_qp: Arc, 24 | server_ep: Arc, 25 | args: CmdlineArgs 26 | ) 27 | where T: Send + 'static + Sync + Copy 28 | { 29 | let ctx = client_qp.ctx(); 30 | let mut ud_buffer = UdBuffer::new(MAX_FLYING_MSG, MAX_MSG_SZ); 31 | let region_size = ud_buffer.get_region_size(); 32 | let (send_mr, recv_mr) = if args.huge_page { 33 | ( 34 | MemoryRegion::new_huge_page(ctx.clone(), region_size as _).expect( 35 | "Failed to allocate hugepage MR for send buffer" 36 | ), 37 | MemoryRegion::new_huge_page(ctx.clone(), region_size as _).expect( 38 | "Failed to allocate hugepage MR for send buffer" 39 | ), 40 | ) 41 | } else { 42 | ( 43 | MemoryRegion::new(ctx.clone(), region_size as _).expect( 44 | "Failed to allocate MR for send buffer" 45 | ), 46 | MemoryRegion::new(ctx.clone(), region_size as _).expect( 47 | "Fail to allocate MR for recv buffer" 48 | ), 49 | ) 50 | }; 51 | 52 | let mut recv_doorbell = RecvDoorbellHelper::create(MAX_RECV_NUM, client_qp.clone()); 53 | for wr_id in 0..MAX_FLYING_MSG { 54 | let start = ud_buffer.get_start_addr(); 55 | recv_doorbell 56 | .post_recv(&recv_mr, start..start + MAX_MSG_SZ, wr_id) 57 | .expect("recv should succ"); 58 | } 59 | 60 | let mut completions = [Default::default(); MAX_FLYING_MSG as usize]; 61 | let mut pending: usize = 0; 62 | let batch_or_not = if args.latency_test { 1 } else { args.signal_size }; 63 | // encode client message to imm so that server can know who to reply to 64 | let imm_data = encode_id(args.client_id as _, thread_id as _); 65 | let payload = align_to_cacheline(args.payload); 66 | // each loop send args.factor UD msgs and wait for their replies 67 | while runner.running() { 68 | std::sync::atomic::compiler_fence(std::sync::atomic::Ordering::Release); 69 | let req_batch = if args.latency_test { 1 } else { args.factor }; 70 | for i in 0..req_batch { 71 | let signal = pending == 0; 72 | let start = ud_buffer.get_start_addr(); 73 | 74 | client_qp 75 | .post_datagram_w_imm( 76 | server_ep.borrow(), 77 | &send_mr, 78 | start..start + payload, 79 | i, 80 | imm_data, 81 | signal 82 | ) 83 | .expect("send should succeeed"); 84 | pending += 1; 85 | recv_doorbell 86 | .post_recv(&recv_mr, start..start + MAX_MSG_SZ, i) 87 | .expect("recv should succ"); 88 | if pending >= batch_or_not { 89 | let mut ok = false; 90 | let mut completions = [Default::default()]; 91 | while !ok { 92 | let ret = client_qp.poll_send_cq(&mut completions).expect("Failed to poll cq"); 93 | if ret.len() > 0 { 94 | if ret[0].status != 0 { 95 | panic!("cq status: {}", ret[0].status); 96 | } 97 | ok = true; 98 | } 99 | } 100 | pending = 0; 101 | } 102 | } 103 | 104 | let mut remaining = req_batch as i64; 105 | while remaining > 0 && runner.running() { 106 | let recv = client_qp.poll_recv_cq(&mut completions).unwrap(); 107 | let recv_msg_num = recv.len() as u64; 108 | remaining -= recv_msg_num as i64; 109 | if remaining < 0 { 110 | panic!( 111 | "Wrong in your programming, reply to an false client. Num of additional message: {}", 112 | -remaining 113 | ); 114 | } 115 | unsafe { 116 | Arc::get_mut_unchecked(&mut stat).finished_batch_ops(recv_msg_num); 117 | } 118 | } 119 | } 120 | } 121 | 122 | pub fn perform_client_profile_routine( 123 | thread_id: usize, 124 | runner: Arc>, 125 | mut stat: Arc, 126 | client_qp: Arc, 127 | server_ep: Arc, 128 | args: CmdlineArgs 129 | ) 130 | where T: Send + 'static + Sync + Copy 131 | 132 | { 133 | let ctx = client_qp.ctx(); 134 | let mut ud_buffer = UdBuffer::new(MAX_FLYING_MSG, MAX_MSG_SZ); 135 | let region_size = ud_buffer.get_region_size(); 136 | let (send_mr, recv_mr) = if args.huge_page { 137 | ( 138 | MemoryRegion::new_huge_page(ctx.clone(), region_size as _).expect( 139 | "Failed to allocate hugepage MR for send buffer" 140 | ), 141 | MemoryRegion::new_huge_page(ctx.clone(), region_size as _).expect( 142 | "Failed to allocate hugepage MR for send buffer" 143 | ), 144 | ) 145 | } else { 146 | ( 147 | MemoryRegion::new(ctx.clone(), region_size as _).expect( 148 | "Failed to allocate MR for send buffer" 149 | ), 150 | MemoryRegion::new(ctx.clone(), region_size as _).expect( 151 | "Fail to allocate MR for recv buffer" 152 | ), 153 | ) 154 | }; 155 | 156 | let mut recv_doorbell = RecvDoorbellHelper::create(MAX_RECV_NUM, client_qp.clone()); 157 | for wr_id in 0..MAX_FLYING_MSG { 158 | let start = ud_buffer.get_start_addr(); 159 | recv_doorbell 160 | .post_recv(&recv_mr, start..start + MAX_MSG_SZ, wr_id) 161 | .expect("recv should succ"); 162 | } 163 | 164 | let mut completions = [Default::default(); MAX_FLYING_MSG as usize]; 165 | let mut pending: usize = 0; 166 | let batch_or_not = if args.latency_test { 1 } else { args.signal_size }; 167 | // encode client message to imm so that server can know who to reply to 168 | let imm_data = encode_id(args.client_id as _, thread_id as _); 169 | let payload = align_to_cacheline(args.payload); 170 | // each loop send args.factor UD msgs and wait for their replies 171 | while runner.running() { 172 | std::sync::atomic::compiler_fence(std::sync::atomic::Ordering::Release); 173 | let req_batch = if args.latency_test { 1 } else { args.factor }; 174 | for i in 0..req_batch { 175 | let signal = pending == 0; 176 | let start = ud_buffer.get_start_addr(); 177 | #[cfg(not(feature = "ARM"))] 178 | let begin_ts = get_rdtsc(); 179 | 180 | client_qp 181 | .post_datagram_w_imm( 182 | server_ep.borrow(), 183 | &send_mr, 184 | start..start + payload, 185 | i, 186 | imm_data, 187 | signal 188 | ) 189 | .expect("send should succeeed"); 190 | #[cfg(not(feature = "ARM"))] 191 | { 192 | let end_ts = get_rdtsc(); 193 | unsafe { 194 | Arc::get_mut_unchecked(&mut stat).record_avg_rdtsc((end_ts - begin_ts).try_into().unwrap()); 195 | } 196 | } 197 | 198 | pending += 1; 199 | recv_doorbell 200 | .post_recv(&recv_mr, start..start + MAX_MSG_SZ, i) 201 | .expect("recv should succ"); 202 | if pending >= batch_or_not { 203 | let mut ok = false; 204 | let mut completions = [Default::default()]; 205 | while !ok { 206 | let ret = client_qp.poll_send_cq(&mut completions).expect("Failed to poll cq"); 207 | if ret.len() > 0 { 208 | if ret[0].status != 0 { 209 | panic!("cq status: {}", ret[0].status); 210 | } 211 | ok = true; 212 | } 213 | } 214 | pending = 0; 215 | } 216 | } 217 | 218 | let mut remaining = req_batch as i64; 219 | while remaining > 0 && runner.running() { 220 | let recv = client_qp.poll_recv_cq(&mut completions).unwrap(); 221 | let recv_msg_num = recv.len() as u64; 222 | remaining -= recv_msg_num as i64; 223 | if remaining < 0 { 224 | panic!( 225 | "Wrong in your programming, reply to an false client. Num of additional message: {}", 226 | -remaining 227 | ); 228 | } 229 | unsafe { 230 | Arc::get_mut_unchecked(&mut stat).finished_batch_ops(recv_msg_num); 231 | } 232 | } 233 | } 234 | } 235 | 236 | pub fn perform_client_doorbell_routine( 237 | thread_id: usize, 238 | runner: Arc>, 239 | mut stat: Arc, 240 | client_qp: Arc, 241 | server_ep: Arc, 242 | args: CmdlineArgs 243 | ) 244 | where T: Send + 'static + Sync + Copy 245 | { 246 | let ctx = client_qp.ctx(); 247 | let mut ud_buffer = UdBuffer::new(MAX_FLYING_MSG, MAX_MSG_SZ); 248 | let region_size = ud_buffer.get_region_size(); 249 | let (send_mr, recv_mr) = if args.huge_page { 250 | ( 251 | MemoryRegion::new_huge_page(ctx.clone(), region_size as _).expect( 252 | "Failed to allocate hugepage MR for send buffer" 253 | ), 254 | MemoryRegion::new_huge_page(ctx.clone(), region_size as _).expect( 255 | "Failed to allocate hugepage MR for send buffer" 256 | ), 257 | ) 258 | } else { 259 | ( 260 | MemoryRegion::new(ctx.clone(), region_size as _).expect( 261 | "Failed to allocate MR for send buffer" 262 | ), 263 | MemoryRegion::new(ctx.clone(), region_size as _).expect( 264 | "Fail to allocate MR for recv buffer" 265 | ), 266 | ) 267 | }; 268 | let mut ud_doorbell = UdDoorbellHelper::create( 269 | args.db_size, 270 | ibv_wr_opcode::IBV_WR_SEND_WITH_IMM, 271 | client_qp.clone() 272 | ); 273 | let mut recv_doorbell = RecvDoorbellHelper::create(MAX_RECV_NUM, client_qp.clone()); 274 | for wr_id in 0..MAX_FLYING_MSG { 275 | let start = ud_buffer.get_start_addr(); 276 | recv_doorbell 277 | .post_recv(&recv_mr, start..start + MAX_MSG_SZ, wr_id) 278 | .expect("recv should succ"); 279 | } 280 | 281 | let mut completions = [Default::default(); MAX_FLYING_MSG as usize]; 282 | let mut pending: usize = 0; 283 | let batch_or_not = if args.latency_test { 1 } else { args.signal_size }; 284 | // encode client message to imm so that server can know who to reply to 285 | let imm_data = encode_id(args.client_id as _, thread_id as _); 286 | let payload = align_to_cacheline(args.payload); 287 | // each loop send args.factor UD msgs and wait for their replies 288 | while runner.running() { 289 | std::sync::atomic::compiler_fence(std::sync::atomic::Ordering::Release); 290 | let req_batch = if args.latency_test { 1 } else { args.factor }; 291 | for i in 0..req_batch { 292 | let signal = pending == 0; 293 | let start = ud_buffer.get_start_addr(); 294 | 295 | ud_doorbell 296 | .post_send( 297 | server_ep.borrow(), 298 | &send_mr, 299 | start..start + payload, 300 | i, 301 | Some(imm_data), 302 | signal 303 | ) 304 | .expect("send should succeeed"); 305 | pending += 1; 306 | recv_doorbell 307 | .post_recv(&recv_mr, start..start + MAX_MSG_SZ, i) 308 | .expect("recv should succ"); 309 | if pending >= batch_or_not { 310 | let mut ok = false; 311 | let mut completions = [Default::default()]; 312 | while !ok { 313 | let ret = client_qp.poll_send_cq(&mut completions).expect("Failed to poll cq"); 314 | if ret.len() > 0 { 315 | if ret[0].status != 0 { 316 | panic!("cq status: {}", ret[0].status); 317 | } 318 | ok = true; 319 | } 320 | } 321 | pending = 0; 322 | } 323 | } 324 | 325 | let mut remaining = req_batch as i64; 326 | while remaining > 0 && runner.running() { 327 | let recv = client_qp.poll_recv_cq(&mut completions).unwrap(); 328 | let recv_msg_num = recv.len() as u64; 329 | remaining -= recv_msg_num as i64; 330 | if remaining < 0 { 331 | panic!( 332 | "Wrong in your programming, reply to an false client. Num of additional message: {}", 333 | -remaining 334 | ); 335 | } 336 | unsafe { 337 | Arc::get_mut_unchecked(&mut stat).finished_batch_ops(recv_msg_num); 338 | } 339 | } 340 | } 341 | } --------------------------------------------------------------------------------