├── .gitignore ├── .gitmodules ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── README.md ├── antagonist.config ├── apps ├── bench │ ├── .gitignore │ ├── Makefile │ ├── README.md │ ├── RpcManager.cc │ ├── RpcManager.h │ ├── callibrate.cc │ ├── efficiency.cc │ ├── efficiency_go.go │ ├── efficiency_linux.cc │ ├── fake_worker.cc │ ├── fake_worker.h │ ├── flash_client.cc │ ├── linux_mech_bench.cc │ ├── memcached_router.cc │ ├── netbench.cc │ ├── netbench2.cc │ ├── netbench_linux.cc │ ├── netbench_udp.cc │ ├── netperf.cc │ ├── proto.h │ ├── storage_bench.cc │ ├── stress.cc │ ├── stress.config │ ├── stress_linux.cpp │ ├── tbench.cc │ ├── tbench.config │ └── waking.config ├── dpdk_netperf │ ├── .gitignore │ ├── Makefile │ ├── README.md │ └── dpdk_netperf.c ├── netbench │ ├── .gitignore │ ├── Makefile │ ├── distribution.cc │ ├── distribution.h │ ├── format.sh │ ├── interference.cc │ ├── netbench.cc │ ├── stress.cc │ ├── stress_linux.cc │ ├── stress_shm.cc │ ├── stress_shm_query.cc │ ├── synthetic_worker.cc │ ├── synthetic_worker.h │ ├── util.cc │ └── util.h ├── spdk_perf │ ├── .gitignore │ ├── Makefile │ ├── README.md │ ├── perf.c │ └── run_perf.sh ├── storage_service │ ├── .gitignore │ ├── Makefile │ ├── reflex.h │ ├── snappy.sh │ └── storage_server.cc ├── stream │ ├── .gitignore │ ├── Makefile │ ├── stream.cc │ └── stream_query.cc ├── streamcluster │ ├── COPYRIGHT │ ├── Makefile │ ├── parsec_barrier.cpp │ ├── parsec_barrier.hpp │ └── streamcluster.cpp └── synthetic │ ├── .gitignore │ ├── Cargo.toml │ ├── build.rs │ ├── rust-toolchain.toml │ └── src │ ├── backend.rs │ ├── distribution.rs │ ├── dns.rs │ ├── fakework.rs │ ├── http.rs │ ├── lockstep.rs │ ├── main.rs │ ├── memcached.rs │ ├── payload.rs │ └── reflex.rs ├── base ├── base.ld ├── bitmap.c ├── cpu.c ├── fd_transfer.c ├── init.c ├── init_internal.h ├── jenkins_hash.c ├── list.c ├── log.c ├── lrpc.c ├── mem.c ├── mempool.c ├── page.c ├── pci.c ├── signal.c ├── slab.c ├── stat.c ├── syscall.S ├── sysfs.c ├── tcache.c ├── thread.c └── time.c ├── bindings ├── cc │ ├── .gitignore │ ├── Makefile │ ├── net.cc │ ├── net.h │ ├── runtime.cc │ ├── runtime.h │ ├── storage.h │ ├── sync.h │ ├── test.cc │ ├── thread.cc │ ├── thread.h │ └── timer.h └── rust │ ├── .gitignore │ ├── Cargo.lock │ ├── Cargo.toml │ ├── build.rs │ ├── rust-toolchain.toml │ ├── shenango.h │ └── src │ ├── asm.rs │ ├── ffi.rs │ ├── lib.rs │ ├── storage.rs │ ├── tcp.rs │ ├── test_hello.rs │ ├── test_runtime_joinhandle.rs │ ├── test_runtime_threads.rs │ ├── test_smalloc.rs │ ├── thread.rs │ └── udp.rs ├── breakwater ├── Makefile ├── README.md ├── apps │ └── netbench │ │ ├── Makefile │ │ ├── client.config │ │ ├── netbench.cc │ │ ├── server.config │ │ ├── synthetic_worker.cc │ │ ├── synthetic_worker.h │ │ ├── util.cc │ │ └── util.h ├── bindings │ └── cc │ │ ├── Makefile │ │ ├── inc │ │ └── breakwater │ │ │ └── rpc++.h │ │ └── rpc++.cc ├── inc │ └── breakwater │ │ ├── breakwater.h │ │ ├── dagor.h │ │ ├── nocontrol.h │ │ ├── rpc.h │ │ └── seda.h ├── scripts │ └── setup_machine.sh └── src │ ├── bw_client.c │ ├── bw_config.h │ ├── bw_proto.h │ ├── bw_server.c │ ├── dg_client.c │ ├── dg_config.h │ ├── dg_proto.h │ ├── dg_server.c │ ├── nc_client.c │ ├── nc_config.h │ ├── nc_proto.h │ ├── nc_server.c │ ├── sd_client.c │ ├── sd_config.h │ ├── sd_proto.h │ ├── sd_server.c │ ├── util.c │ └── util.h ├── build ├── config ├── init_submodules.sh ├── mlx4_22_03.patch ├── mlx5_22_03.patch ├── patches │ ├── dpdk │ │ ├── 0001-config-extend-max-memseg-lists.patch │ │ ├── 0002-i40e-disable-itr.patch │ │ └── 0003-ixgbe-performance-tuning.patch │ └── rdma-core │ │ ├── 0001-fast-runtime-flow-steering.patch │ │ ├── 0002-vfio-directpath-driver-support.patch │ │ ├── 0003-enable-fast-flow-steering-in-vfio-mode.patch │ │ ├── 0004-expose-object-id.patch │ │ └── 0005-increase-max-number-of-qps-cqs.patch ├── pcm.patch ├── shared.mk ├── spdk.patch └── spdk2.patch ├── client.config ├── inc ├── asm │ ├── atomic.h │ ├── chksum.h │ ├── cpu.h │ └── ops.h ├── base │ ├── assert.h │ ├── atomic.h │ ├── bitmap.h │ ├── byteorder.h │ ├── compiler.h │ ├── cpu.h │ ├── fd_transfer.h │ ├── gen.h │ ├── hash.h │ ├── init.h │ ├── kref.h │ ├── limits.h │ ├── list.h │ ├── lock.h │ ├── log.h │ ├── lrpc.h │ ├── mem.h │ ├── mempool.h │ ├── page.h │ ├── pci.h │ ├── signal.h │ ├── slab.h │ ├── stat.h │ ├── stddef.h │ ├── syscall.h │ ├── sysfs.h │ ├── tcache.h │ ├── thread.h │ ├── time.h │ ├── trapframe.h │ └── types.h ├── iokernel │ ├── control.h │ ├── directpath.h │ ├── queue.h │ └── shm.h ├── net │ ├── arp.h │ ├── chksum.h │ ├── ethernet.h │ ├── icmp.h │ ├── ip.h │ ├── mbuf.h │ ├── mbufq.h │ ├── ping.h │ ├── tcp.h │ └── udp.h └── runtime │ ├── gc.h │ ├── net.h │ ├── poll.h │ ├── preempt.h │ ├── rcu.h │ ├── rculist.h │ ├── runtime.h │ ├── smalloc.h │ ├── storage.h │ ├── sync.h │ ├── tcp.h │ ├── thread.h │ ├── timer.h │ └── udp.h ├── iokernel ├── commands.c ├── control.c ├── defs.h ├── directpath │ ├── arp_fwd.c │ ├── command.c │ ├── core.c │ ├── defs.h │ ├── events.c │ ├── mlx5_ifc.h │ ├── queues.c │ └── steering.c ├── dma.c ├── dp_clients.c ├── dpdk.c ├── hw_timestamp.c ├── hw_timestamp.h ├── ias.c ├── ias.h ├── ias_bw.c ├── ias_ht.c ├── ias_ts.c ├── ksched.c ├── ksched.h ├── main.c ├── mempool_completion.c ├── numa.c ├── pcm.h ├── pmc.h ├── ref.h ├── rx.c ├── sched.c ├── sched.h ├── simple.c ├── stat.c ├── timer_wheel.c └── tx.c ├── ksched ├── .gitignore ├── Kbuild ├── Makefile ├── defs.h ├── fake_idle.c ├── ksched.h ├── ksched_main.c ├── uintr.c ├── uintr.h └── uintr_hw.h ├── net └── netdump.c ├── runtime ├── cfg.c ├── defs.h ├── gc.c ├── init.c ├── ioqueues.c ├── kthread.c ├── net │ ├── arp.c │ ├── core.c │ ├── defs.h │ ├── directpath │ │ ├── common.c │ │ ├── defs.h │ │ └── mlx5 │ │ │ ├── mlx5.h │ │ │ ├── mlx5_flow_steering.c │ │ │ ├── mlx5_ifc.h │ │ │ ├── mlx5_init_common.c │ │ │ ├── mlx5_init_external.c │ │ │ ├── mlx5_init_verbs.c │ │ │ ├── mlx5_queue_steering.c │ │ │ ├── mlx5_rx_stride.c │ │ │ └── mlx5_rxtx.c │ ├── icmp.c │ ├── ping.c │ ├── tcp.c │ ├── tcp.h │ ├── tcp_debug.c │ ├── tcp_in.c │ ├── tcp_out.c │ ├── transport.c │ ├── udp.c │ └── waitq.h ├── poll.c ├── preempt.c ├── rcu.c ├── sched.c ├── smalloc.c ├── softirq.c ├── stack.c ├── stat.c ├── storage.c ├── switch.S ├── sync.c ├── timer.c └── uintr.S ├── sample.config ├── scripts ├── count_loc.sh ├── cstate.c ├── rstat.go ├── set_irq_affinity ├── setup_machine.sh ├── setup_vfs.sh └── spin.cc ├── server.config ├── shim ├── Makefile ├── README ├── common.h ├── entry.c ├── mem.c ├── pthread.c ├── sem.c ├── sleep.c ├── sync.c └── tls.c ├── tests ├── .gitignore ├── netperf.c ├── test_base_gen.c ├── test_base_hello.c ├── test_base_lrpc.c ├── test_base_thread.c ├── test_kthread_attach.c ├── test_kthread_wakeup.c ├── test_many_threads.c ├── test_multiple_runtimes.c ├── test_ping.c ├── test_runtime_mutexes.c ├── test_runtime_rcu.c ├── test_runtime_smalloc.c ├── test_runtime_threads.c ├── test_runtime_timer.c ├── test_storage.c └── test_storage_iops.c └── victim.config /.gitignore: -------------------------------------------------------------------------------- 1 | *.d 2 | *.o 3 | *.a 4 | [._]*.sw[a-p] 5 | iokerneld 6 | *~ 7 | .cproject 8 | .project 9 | ._* 10 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "dpdk"] 2 | path = dpdk 3 | url = https://github.com/DPDK/dpdk 4 | [submodule "spdk"] 5 | path = spdk 6 | url = https://github.com/spdk/spdk 7 | [submodule "rdma-core"] 8 | path = rdma-core 9 | url = https://github.com/linux-rdma/rdma-core 10 | [submodule "apps/storage_service/snappy"] 11 | path = apps/storage_service/snappy 12 | url = https://github.com/google/snappy.git 13 | [submodule "deps/pcm"] 14 | path = deps/pcm 15 | url = https://github.com/opcm/pcm.git 16 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## Code Overview 2 | 3 | apps - synthetic and benchmarking applications. 4 | 5 | base - a extension to the standard C library that provides tools for managing 6 | lists, memory, bitmaps, initialization, atomics, and several other useful 7 | features. 8 | 9 | bindings - language bindings (C++ and rust) for the runtime. 10 | 11 | dpdk - [DPDK](https://www.dpdk.org/) library for accessing NIC queues 12 | from userspace. 13 | 14 | iokernel - dedicated core that steers packets and reallocates cores 15 | across applications. 16 | 17 | net - a packet manipulation library. 18 | 19 | runtime - a user-level threading and networking runtime. 20 | 21 | shim - a shim layer that enables running unmodified 22 | [PARSEC](http://parsec.cs.princeton.edu/) applications atop Shenango. 23 | 24 | 25 | ## Coding Style 26 | 27 | Use the following conventions for C code: 28 | https://www.kernel.org/doc/html/v4.10/process/coding-style.html 29 | 30 | Use the following conventions for C++ code: 31 | https://google.github.io/styleguide/cppguide.html 32 | 33 | For third party libraries and tools, use their existing coding style. 34 | 35 | For some helpful tips on how to write clean code, see: 36 | https://www.lysator.liu.se/c/pikestyle.html 37 | -------------------------------------------------------------------------------- /antagonist.config: -------------------------------------------------------------------------------- 1 | # an example runtime config file 2 | host_addr 192.168.1.9 3 | host_netmask 255.255.255.0 4 | host_gateway 192.168.1.1 5 | runtime_kthreads 20 6 | runtime_guaranteed_kthreads 0 7 | runtime_priority be 8 | runtime_qdelay_us 10 9 | -------------------------------------------------------------------------------- /apps/bench/.gitignore: -------------------------------------------------------------------------------- 1 | callibrate 2 | efficiency 3 | efficiency_linux 4 | stress 5 | stress_linux 6 | tbench 7 | netbench 8 | netbench2 9 | netbench_udp 10 | netbench_linux 11 | netperf 12 | linux_mech_bench 13 | memcached_router 14 | flash_client 15 | storage_bench 16 | -------------------------------------------------------------------------------- /apps/bench/README.md: -------------------------------------------------------------------------------- 1 | # Threading Benchmarks 2 | 3 | First build Shenango and then build the benchmarks in this directory 4 | with `make clean && make`. Run the main Shenango threading benchmarks 5 | as follows (benchmarks will use a single runtime core). 6 | 7 | In shenango directory: 8 | ``` 9 | sudo ./iokerneld 10 | ``` 11 | 12 | In this directory: 13 | ``` 14 | ./tbench tbench.config 15 | ``` -------------------------------------------------------------------------------- /apps/bench/callibrate.cc: -------------------------------------------------------------------------------- 1 | #include "runtime.h" 2 | #include "fake_worker.h" 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace { 9 | 10 | using us = std::chrono::duration; 11 | constexpr int kMultiply = 100000; 12 | 13 | void Measure(FakeWorker *w, double target) { 14 | double elapsed; 15 | uint64_t i = 1; 16 | 17 | do { 18 | i *= 2; 19 | auto start = std::chrono::steady_clock::now(); 20 | for (int j = 0; j < kMultiply; j++) w->Work(i); 21 | auto finish = std::chrono::steady_clock::now(); 22 | elapsed = std::chrono::duration_cast(finish - start).count(); 23 | } while (elapsed < target * kMultiply); 24 | 25 | while (elapsed > target * kMultiply) { 26 | --i; 27 | auto start = std::chrono::steady_clock::now(); 28 | for (int j = 0; j < kMultiply; j++) w->Work(i); 29 | auto finish = std::chrono::steady_clock::now(); 30 | elapsed = std::chrono::duration_cast(finish - start).count(); 31 | } 32 | 33 | std::cout << i << " iterations took " << elapsed / kMultiply << " us." 34 | << std::endl; 35 | } 36 | 37 | } // anonymous namespace 38 | 39 | int main(int argc, char *argv[]) { 40 | if (argc != 3) { 41 | std::cerr << "usage: [microseconds (double)] [worker_spec]" << std::endl; 42 | return 1; 43 | } 44 | 45 | FakeWorker *w = FakeWorkerFactory(argv[2]); 46 | if (!w) { 47 | std::cerr << "Invalid worker argument." << std::endl; 48 | return 1; 49 | } 50 | Measure(w, std::stod(argv[1], nullptr)); 51 | 52 | return 0; 53 | } 54 | -------------------------------------------------------------------------------- /apps/bench/fake_worker.h: -------------------------------------------------------------------------------- 1 | // fake_worker.h - support for carefully controlled fake work generation 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | class FakeWorker { 11 | public: 12 | // Perform n iterations of fake work. 13 | virtual void Work(uint64_t n) = 0; 14 | }; 15 | 16 | class SqrtWorker : public FakeWorker { 17 | public: 18 | SqrtWorker() {} 19 | ~SqrtWorker() {} 20 | 21 | // Performs n iterations of sqrt(). 22 | void Work(uint64_t n); 23 | }; 24 | 25 | class StridedMemtouchWorker : public FakeWorker { 26 | public: 27 | ~StridedMemtouchWorker() {delete buf_;} 28 | 29 | // Creates a strided memory touching worker. 30 | static StridedMemtouchWorker *Create(std::size_t size, size_t stride); 31 | 32 | // Performs n strided memory touches. 33 | void Work(uint64_t n); 34 | 35 | private: 36 | StridedMemtouchWorker(char *buf, std::size_t size, size_t stride) : 37 | buf_(buf), size_(size), stride_(stride) { } 38 | 39 | volatile char *buf_; 40 | std::size_t size_; 41 | std::size_t stride_; 42 | }; 43 | 44 | class MemStreamWorker : public FakeWorker { 45 | public: 46 | ~MemStreamWorker(); 47 | 48 | // Creates a memory streaming worker. 49 | static MemStreamWorker *Create(std::size_t size); 50 | 51 | // Performs n memory reads. 52 | void Work(uint64_t n); 53 | 54 | private: 55 | MemStreamWorker(char *buf, std::size_t size) : 56 | buf_(buf), size_(size) { } 57 | 58 | volatile char *buf_; 59 | std::size_t size_; 60 | }; 61 | 62 | class RandomMemtouchWorker : public FakeWorker { 63 | public: 64 | ~RandomMemtouchWorker() {delete buf_;} 65 | 66 | // Creates a random memory touching worker. 67 | static RandomMemtouchWorker *Create(std::size_t size, unsigned int seed); 68 | 69 | // Performs n random memory touches. 70 | void Work(uint64_t n); 71 | 72 | private: 73 | RandomMemtouchWorker(char *buf, std::vector schedule) : 74 | buf_(buf), schedule_(std::move(schedule)) { } 75 | 76 | volatile char *buf_; 77 | std::vector schedule_; 78 | }; 79 | 80 | // Parses a string to generate one of the above fake workers. 81 | FakeWorker *FakeWorkerFactory(std::string s); 82 | -------------------------------------------------------------------------------- /apps/bench/proto.h: -------------------------------------------------------------------------------- 1 | // a really basic encoding for experiment messages 2 | 3 | #pragma once 4 | 5 | // The netbench server responds to this port. 6 | constexpr uint64_t kNetbenchPort = 8001; 7 | 8 | constexpr uint32_t kMagic = 0x6e626368; // 'nbch' 9 | constexpr uint32_t kKill = 0x6b696c6c; // 'kill' 10 | 11 | struct nbench_req { 12 | uint32_t magic; 13 | int nports; 14 | }; 15 | 16 | struct nbench_resp { 17 | uint32_t magic; 18 | int nports; 19 | uint16_t ports[]; 20 | }; 21 | 22 | struct payload { 23 | uint32_t tag; 24 | uint64_t idx; 25 | double workn; 26 | char pad[]; 27 | }; 28 | -------------------------------------------------------------------------------- /apps/bench/stress.cc: -------------------------------------------------------------------------------- 1 | extern "C" { 2 | #include 3 | #undef min 4 | #undef max 5 | } 6 | 7 | #include "runtime.h" 8 | #include "thread.h" 9 | #include "sync.h" 10 | #include "timer.h" 11 | #include "fake_worker.h" 12 | 13 | #include 14 | #include 15 | 16 | namespace { 17 | 18 | int threads; 19 | uint64_t n; 20 | std::string worker_spec; 21 | 22 | void MainHandler(void *arg) { 23 | rt::WaitGroup wg(1); 24 | std::vector cnt(threads); 25 | 26 | for (int i = 0; i < threads; ++i) { 27 | rt::Spawn([&,i](){ 28 | auto *w = FakeWorkerFactory(worker_spec); 29 | if (w == nullptr) { 30 | std::cerr << "Failed to create worker." << std::endl; 31 | exit(1); 32 | } 33 | 34 | while (true) { 35 | w->Work(n); 36 | cnt[i]++; 37 | rt::Yield(); 38 | } 39 | }); 40 | } 41 | 42 | rt::Spawn([&](){ 43 | uint64_t last_total = 0; 44 | auto last = std::chrono::steady_clock::now(); 45 | while (1) { 46 | rt::Sleep(rt::kSeconds); 47 | auto now = std::chrono::steady_clock::now(); 48 | uint64_t total = 0; 49 | double duration = std::chrono::duration_cast< 50 | std::chrono::duration>(now - last).count(); 51 | for (int i = 0; i < threads; i++) total += cnt[i]; 52 | log_info("%f", static_cast(total - last_total) / duration); 53 | last_total = total; 54 | last = now; 55 | } 56 | }); 57 | 58 | // never returns 59 | wg.Wait(); 60 | } 61 | 62 | } // anonymous namespace 63 | 64 | int main(int argc, char *argv[]) { 65 | int ret; 66 | 67 | if (argc != 5) { 68 | std::cerr << "usage: [config_file] [#threads] [#n] [worker_spec]" 69 | << std::endl; 70 | return -EINVAL; 71 | } 72 | 73 | threads = std::stoi(argv[2], nullptr, 0); 74 | n = std::stoul(argv[3], nullptr, 0); 75 | worker_spec = std::string(argv[4]); 76 | 77 | ret = runtime_init(argv[1], MainHandler, NULL); 78 | if (ret) { 79 | printf("failed to start runtime\n"); 80 | return ret; 81 | } 82 | 83 | return 0; 84 | } 85 | -------------------------------------------------------------------------------- /apps/bench/stress.config: -------------------------------------------------------------------------------- 1 | # an example runtime config file for filling all unused cores 2 | # (assuming 24 total available) with batch work 3 | host_addr 192.168.1.111 4 | host_netmask 255.255.255.0 5 | host_gateway 192.168.1.1 6 | runtime_kthreads 22 7 | -------------------------------------------------------------------------------- /apps/bench/stress_linux.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "fake_worker.h" 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace { 9 | 10 | int threads; 11 | uint64_t n; 12 | std::string worker_spec; 13 | 14 | void MainHandler(void *arg) { 15 | std::vector cnt(threads); 16 | 17 | for (int i = 0; i < threads; ++i) { 18 | std::thread([i, &cnt](){ 19 | auto *w = FakeWorkerFactory(worker_spec); 20 | if (w == nullptr) { 21 | std::cerr << "Failed to create worker." << std::endl; 22 | exit(1); 23 | } 24 | 25 | while (true) { 26 | w->Work(n); 27 | cnt[i]++; 28 | } 29 | }).detach(); 30 | } 31 | 32 | std::thread([&](){ 33 | uint64_t last_total = 0; 34 | auto last = std::chrono::steady_clock::now(); 35 | while (1) { 36 | std::chrono::seconds sec(1); 37 | std::this_thread::sleep_for(sec); 38 | auto now = std::chrono::steady_clock::now(); 39 | uint64_t total = 0; 40 | double duration = std::chrono::duration_cast< 41 | std::chrono::duration>(now - last).count(); 42 | for (int i = 0; i < threads; i++) total += cnt[i]; 43 | std::cerr << static_cast(total - last_total) / duration 44 | << std::endl; 45 | last_total = total; 46 | last = now; 47 | } 48 | }).join(); 49 | 50 | // never returns 51 | } 52 | 53 | } // anonymous namespace 54 | 55 | int main(int argc, char *argv[]) { 56 | 57 | if (argc != 4) { 58 | std::cerr << "usage: [#threads] [#n] [worker_spec]" 59 | << std::endl; 60 | return -EINVAL; 61 | } 62 | 63 | threads = std::stoi(argv[1], nullptr, 0); 64 | n = std::stoul(argv[2], nullptr, 0); 65 | worker_spec = std::string(argv[3]); 66 | 67 | MainHandler(NULL); 68 | 69 | return 0; 70 | } 71 | -------------------------------------------------------------------------------- /apps/bench/tbench.config: -------------------------------------------------------------------------------- 1 | # an example runtime config file 2 | host_addr 192.168.1.2 3 | host_netmask 255.255.255.0 4 | host_gateway 192.168.1.1 5 | runtime_kthreads 1 6 | runtime_guaranteed_kthreads 1 7 | runtime_spinning_kthreads 1 8 | disable_watchdog 1 -------------------------------------------------------------------------------- /apps/bench/waking.config: -------------------------------------------------------------------------------- 1 | # an example runtime config file 2 | host_addr 192.168.1.2 3 | host_netmask 255.255.255.0 4 | host_gateway 192.168.1.1 5 | runtime_kthreads 1 6 | runtime_guaranteed_kthreads 1 7 | -------------------------------------------------------------------------------- /apps/dpdk_netperf/.gitignore: -------------------------------------------------------------------------------- 1 | build -------------------------------------------------------------------------------- /apps/dpdk_netperf/README.md: -------------------------------------------------------------------------------- 1 | # Latency Benchmarks 2 | 3 | First build DPDK (without driver modifications), then build 4 | dpdk_netperf in this directory with `make clean && make`. 5 | 6 | ## DPDK only 7 | To run the benchmark with pure DPDK on both machines: 8 | 9 | On the server (IP 192.168.1.2): 10 | ``` 11 | sudo ./build/dpdk_netperf -l2 --socket-mem=128 -- UDP_SERVER 192.168.1.2 12 | ``` 13 | 14 | On the client (IP 192.168.1.3): 15 | ``` 16 | sudo ./build/dpdk_netperf -l2 --socket-mem=128 -- UDP_CLIENT 192.168.1.3 192.168.1.2 50000 8001 10 8 50 17 | ``` 18 | 19 | ## Shenango spinning (IOKernel + runtime) 20 | 21 | To run Shenango with the server runtime thread spinning, start the 22 | IOKernel and then in `shenango/apps/bench`: 23 | 24 | ``` 25 | ./netbench_udp tbench.config server 26 | ``` 27 | Then run the client as above. 28 | 29 | ## Shenango waking (IOKernel + runtime + wakeup) 30 | 31 | To run with Shenango in its default mode but no batch work, start the 32 | IOKernel and then in `shenango/apps/bench`: 33 | ``` 34 | ./netbench_udp waking.config server 35 | ``` 36 | Then run the client as above. 37 | 38 | ## Shenango preempting (IOKernel + runtime + wakeup + preemption) 39 | 40 | To run Shenango with a batch application running concurrently, start 41 | the IOKernel and then in `shenango/apps/bench`: 42 | ``` 43 | ./stress stress.config 100 100 sqrt 44 | ./netbench_udp waking.config server 45 | ``` 46 | 47 | Then run the client as above. If your server does not have 24 48 | hyperthreads, you will need to adjust `runtime_kthreads` in 49 | stress.config to be 2 fewer than the number of hyperthreads on your 50 | server. -------------------------------------------------------------------------------- /apps/netbench/.gitignore: -------------------------------------------------------------------------------- 1 | stress 2 | netbench 3 | interference 4 | stress_linux 5 | stress_shm 6 | stress_shm_query 7 | -------------------------------------------------------------------------------- /apps/netbench/distribution.cc: -------------------------------------------------------------------------------- 1 | // distribution.cc - support for generating random distributions 2 | 3 | #include "distribution.h" 4 | #include "util.h" 5 | 6 | #include 7 | 8 | Distribution *DistributionFactory(std::string s) { 9 | std::vector tokens = split(s, ':'); 10 | 11 | // the first token is the type of worker, must be specified 12 | auto cnt = tokens.size(); 13 | if (cnt < 1) return nullptr; 14 | 15 | if (tokens[0] == "fixed" && cnt == 2) { 16 | double val = std::stod(tokens[1], nullptr); 17 | return new FixedDistribution(val); 18 | } else if (tokens[0] == "exponential" && cnt == 2) { 19 | double val = std::stod(tokens[1], nullptr); 20 | return new ExponentialDistribution(rand(), val); 21 | } else if (tokens[0] == "bimodal" && cnt == 4) { 22 | double low = std::stod(tokens[1], nullptr); 23 | double high = std::stod(tokens[2], nullptr); 24 | double frac = std::stod(tokens[3], nullptr); 25 | return new BimodalDistribution(rand(), low, high, frac); 26 | } 27 | 28 | // invalid type of worker 29 | return nullptr; 30 | } 31 | -------------------------------------------------------------------------------- /apps/netbench/distribution.h: -------------------------------------------------------------------------------- 1 | // distribution.h - support for generating random distributions 2 | 3 | #pragma once 4 | 5 | #include 6 | 7 | class Distribution { 8 | public: 9 | virtual ~Distribution() {} 10 | 11 | // Generate the next sample. 12 | virtual double operator()() = 0; 13 | virtual double Mean() const = 0; 14 | }; 15 | 16 | class FixedDistribution : public Distribution { 17 | public: 18 | FixedDistribution(double val) : val_(val) {} 19 | ~FixedDistribution() {} 20 | 21 | double operator()() { return val_; } 22 | double Mean() const { return val_; } 23 | 24 | private: 25 | const double val_; 26 | }; 27 | 28 | class BimodalDistribution : public Distribution { 29 | public: 30 | BimodalDistribution(int seed, double low, double high, double fraction) 31 | : frac_(fraction), low_(low), high_(high), rand_(seed), dist_(0.0, 1.0) {} 32 | 33 | double operator()() { return dist_(rand_) > frac_ ? high_ : low_; } 34 | double Mean() const { return high_ * (1 - frac_) + low_ * frac_; } 35 | 36 | private: 37 | const double frac_; 38 | const double low_; 39 | const double high_; 40 | std::mt19937 rand_; 41 | std::uniform_real_distribution dist_; 42 | }; 43 | 44 | class ExponentialDistribution : public Distribution { 45 | public: 46 | ExponentialDistribution(int seed, double mean) 47 | : mean_(mean), rand_(seed), dist_(1.0f / mean) {} 48 | ~ExponentialDistribution() {} 49 | 50 | double operator()() { return dist_(rand_); } 51 | double Mean() const { return mean_; } 52 | 53 | private: 54 | const double mean_; 55 | std::mt19937 rand_; 56 | std::exponential_distribution dist_; 57 | }; 58 | 59 | // Parses a string to generate one of the above distributions. 60 | Distribution *DistributionFactory(std::string s); 61 | -------------------------------------------------------------------------------- /apps/netbench/format.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # formats all source files 3 | clang-format -i -style=google *.cc *.h 4 | -------------------------------------------------------------------------------- /apps/netbench/stress.cc: -------------------------------------------------------------------------------- 1 | extern "C" { 2 | #include 3 | } 4 | 5 | #include "runtime.h" 6 | #include "sync.h" 7 | #include "synthetic_worker.h" 8 | #include "thread.h" 9 | #include "timer.h" 10 | 11 | #include 12 | #include 13 | 14 | barrier_t barrier; 15 | 16 | bool synth_barrier_wait() { return barrier_wait(&barrier); } 17 | 18 | namespace { 19 | 20 | int threads; 21 | uint64_t n; 22 | std::string worker_spec; 23 | 24 | void MainHandler(void *arg) { 25 | rt::WaitGroup wg(1); 26 | std::vector cnt(threads); 27 | 28 | barrier_init(&barrier, threads); 29 | 30 | for (int i = 0; i < threads; ++i) { 31 | rt::Spawn([&, i]() { 32 | auto *w = SyntheticWorkerFactory(worker_spec); 33 | if (w == nullptr) { 34 | std::cerr << "Failed to create worker." << std::endl; 35 | exit(1); 36 | } 37 | 38 | while (true) { 39 | w->Work(n); 40 | cnt[i]++; 41 | rt::Yield(); 42 | } 43 | }); 44 | } 45 | 46 | rt::Spawn([&]() { 47 | uint64_t last_total = 0; 48 | auto last = std::chrono::steady_clock::now(); 49 | while (1) { 50 | rt::Sleep(rt::kSeconds); 51 | auto now = std::chrono::steady_clock::now(); 52 | uint64_t total = 0; 53 | double duration = 54 | std::chrono::duration_cast>(now - last) 55 | .count(); 56 | for (int i = 0; i < threads; i++) total += cnt[i]; 57 | preempt_disable(); 58 | log_info("%f", static_cast(total - last_total) / duration); 59 | preempt_enable(); 60 | last_total = total; 61 | last = now; 62 | } 63 | }); 64 | 65 | // never returns 66 | wg.Wait(); 67 | } 68 | 69 | } // anonymous namespace 70 | 71 | int main(int argc, char *argv[]) { 72 | int ret; 73 | 74 | if (argc != 5) { 75 | std::cerr << "usage: [config_file] [#threads] [#n] [worker_spec]" 76 | << std::endl; 77 | return -EINVAL; 78 | } 79 | 80 | threads = std::stoi(argv[2], nullptr, 0); 81 | n = std::stoul(argv[3], nullptr, 0); 82 | worker_spec = std::string(argv[4]); 83 | 84 | ret = runtime_init(argv[1], MainHandler, NULL); 85 | if (ret) { 86 | printf("failed to start runtime\n"); 87 | return ret; 88 | } 89 | 90 | return 0; 91 | } 92 | -------------------------------------------------------------------------------- /apps/netbench/stress_shm.cc: -------------------------------------------------------------------------------- 1 | extern "C" { 2 | #include 3 | } 4 | 5 | #include "runtime.h" 6 | #include "sync.h" 7 | #include "synthetic_worker.h" 8 | #include "thread.h" 9 | #include "timer.h" 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #define SHM_KEY (0x123) 17 | 18 | barrier_t barrier; 19 | bool use_barrier = false; 20 | bool synth_barrier_wait() { return barrier_wait(&barrier); } 21 | namespace { 22 | 23 | int threads; 24 | uint64_t n; 25 | std::string worker_spec; 26 | 27 | void MainHandler(void *arg) { 28 | uint64_t *cnt; 29 | int shmid = 30 | shmget((key_t)SHM_KEY, sizeof(uint64_t) * threads * 8, 0777 | IPC_CREAT); 31 | void *shm = NULL; 32 | shm = shmat(shmid, 0, 0); 33 | cnt = (uint64_t *)shm; 34 | 35 | rt::WaitGroup wg(1); 36 | barrier_init(&barrier, threads); 37 | 38 | for (int i = 0; i < threads; ++i) { 39 | rt::Spawn([&, i]() { 40 | auto *w = SyntheticWorkerFactory(worker_spec); 41 | if (w == nullptr) { 42 | std::cerr << "Failed to create worker." << std::endl; 43 | exit(1); 44 | } 45 | 46 | while (true) { 47 | w->Work(n); 48 | cnt[i * 8]++; 49 | if (use_barrier) 50 | synth_barrier_wait(); 51 | else 52 | rt::Yield(); 53 | } 54 | }); 55 | } 56 | 57 | // never returns 58 | wg.Wait(); 59 | } 60 | 61 | } // anonymous namespace 62 | 63 | void PrintUsage() { 64 | std::cerr 65 | << "usage: [config_file] [#threads] [#n] [worker_spec] " 66 | << std::endl; 67 | } 68 | 69 | int main(int argc, char *argv[]) { 70 | int ret; 71 | 72 | if (argc < 5) { 73 | PrintUsage(); 74 | return -EINVAL; 75 | } 76 | 77 | threads = std::stoi(argv[2], nullptr, 0); 78 | n = std::stoul(argv[3], nullptr, 0); 79 | worker_spec = std::string(argv[4]); 80 | 81 | if (argc > 5) { 82 | if (std::string(argv[5]) != "use_barrier") { 83 | PrintUsage(); 84 | return -EINVAL; 85 | } 86 | use_barrier = true; 87 | } 88 | 89 | ret = runtime_init(argv[1], MainHandler, NULL); 90 | if (ret) { 91 | printf("failed to start runtime\n"); 92 | return ret; 93 | } 94 | 95 | return 0; 96 | } 97 | -------------------------------------------------------------------------------- /apps/netbench/util.cc: -------------------------------------------------------------------------------- 1 | #include "util.h" 2 | 3 | std::vector split(const std::string &text, char sep) { 4 | std::vector tokens; 5 | std::string::size_type start = 0, end = 0; 6 | while ((end = text.find(sep, start)) != std::string::npos) { 7 | tokens.push_back(text.substr(start, end - start)); 8 | start = end + 1; 9 | } 10 | tokens.push_back(text.substr(start)); 11 | return tokens; 12 | } 13 | -------------------------------------------------------------------------------- /apps/netbench/util.h: -------------------------------------------------------------------------------- 1 | // util.h - a collection of shared utilities 2 | 3 | #pragma once 4 | 5 | #include "timer.h" 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | using namespace std::chrono; 13 | 14 | struct work_unit { 15 | double start_us, work_us, duration_us; 16 | int cpu; 17 | }; 18 | 19 | template 20 | std::vector GenerateWork(Arrival a, Service s, double cur_us, 21 | double last_us, int cpu) { 22 | std::vector w; 23 | while (cur_us < last_us) { 24 | cur_us += a(); 25 | w.emplace_back(work_unit{cur_us, s(), 0, cpu}); 26 | } 27 | return w; 28 | } 29 | 30 | template 31 | std::vector GenerateWork(Arrival a, Service *s, double cur_us, 32 | double last_us, int cpu) { 33 | std::vector w; 34 | while (cur_us < last_us) { 35 | cur_us += a(); 36 | w.emplace_back(work_unit{cur_us, (*s)(), 0, cpu}); 37 | } 38 | return w; 39 | } 40 | 41 | std::vector split(const std::string &text, char sep); 42 | 43 | class Timer { 44 | public: 45 | using micro = duration; 46 | 47 | Timer() { 48 | barrier(); 49 | start_ts_ = steady_clock::now(); 50 | barrier(); 51 | } 52 | ~Timer(){}; 53 | 54 | // Reset the timer start time. 55 | void Reset() { 56 | barrier(); 57 | start_ts_ = steady_clock::now(); 58 | barrier(); 59 | } 60 | 61 | // Returns the microseconds elapsed since the timer was constructed. 62 | double Elapsed() { 63 | barrier(); 64 | auto now = steady_clock::now(); 65 | barrier(); 66 | return duration_cast(now - start_ts_).count(); 67 | } 68 | 69 | // Busy spin until the deadline (in microseconds) passes. 70 | void SpinUntil(double deadline) { 71 | while (Elapsed() < deadline) cpu_relax(); 72 | } 73 | 74 | // Sleep until the deadline (in microseconds) passes. 75 | void SleepUntil(double deadline) { 76 | double diff = deadline - Elapsed(); 77 | if (diff <= 0) return; 78 | rt::Sleep(static_cast(diff)); 79 | } 80 | 81 | private: 82 | time_point start_ts_; 83 | }; 84 | -------------------------------------------------------------------------------- /apps/spdk_perf/.gitignore: -------------------------------------------------------------------------------- 1 | perf 2 | -------------------------------------------------------------------------------- /apps/spdk_perf/Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # BSD LICENSE 3 | # 4 | # Copyright (c) Intel Corporation. 5 | # All rights reserved. 6 | # 7 | # Redistribution and use in source and binary forms, with or without 8 | # modification, are permitted provided that the following conditions 9 | # are met: 10 | # 11 | # * Redistributions of source code must retain the above copyright 12 | # notice, this list of conditions and the following disclaimer. 13 | # * Redistributions in binary form must reproduce the above copyright 14 | # notice, this list of conditions and the following disclaimer in 15 | # the documentation and/or other materials provided with the 16 | # distribution. 17 | # * Neither the name of Intel Corporation nor the names of its 18 | # contributors may be used to endorse or promote products derived 19 | # from this software without specific prior written permission. 20 | # 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | # 33 | 34 | SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../spdk) 35 | include $(SPDK_ROOT_DIR)/mk/spdk.common.mk 36 | 37 | APP = perf 38 | 39 | include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk 40 | 41 | CFLAGS += -I$(SPDK_ROOT_DIR)/dpdk/build/include 42 | LIBS += -lm 43 | -------------------------------------------------------------------------------- /apps/spdk_perf/README.md: -------------------------------------------------------------------------------- 1 | Copied from https://github.com/anakli/spdk_perf and modified to use newer SPDK version. 2 | 3 | To build, run make. 4 | -------------------------------------------------------------------------------- /apps/spdk_perf/run_perf.sh: -------------------------------------------------------------------------------- 1 | 2 | #!/bin/bash 3 | 4 | # Script to run perf tests 5 | # June 2016 6 | 7 | # Parameters: 8 | # $1 = output_filename 9 | 10 | 11 | if [ $# -ne 1 ] 12 | then 13 | echo "Usage: ./run_perf.sh [output_filename]" 14 | fi 15 | 16 | # Create output file and set permissions for root to write 17 | touch $1 18 | chmod o+w $1 19 | 20 | printf "Workload; Read Ratio; Num cores; Max Qdepth; Req Size; Target IOPS; Rd IOPS; Wr IOPS; Rd Avg; Rd p95; Rd p99; Rd p99.9; Rd p99.99; Wr Avg; Wr p95; Wr p99; Wr p99.9; Wr p99.99; Total p99.9; #dropped \n" >> $1 21 | 22 | 23 | # sweep request sizes 24 | for s in 4096 # 1024 8192 16384 32768 65536 25 | do 26 | # sweep read/write ratios 27 | for m in 0 50 100 #100 75 50 25 0 # 99 95 90 85 80 75 70 60 50 40 30 20 10 0 28 | do 29 | # sweep target IOPS 30 | # note lambda is the target IOPS *per core* 31 | # total target IOPS is lambda times the num cores, specified via coremask parameter 32 | for lambda in `seq 20000 20000 700000` 33 | do 34 | printf "randrw-openloop-exp; %d; 1; 2122000; %d; %d;" "$m" "$s" "$lambda" >> $1 35 | #sudo ./perf -t 120 -s 4096 -q 1024 -w randrw -M $m -c 1 -o $1 -L $lambda 36 | # note: -c is the coremask in hex 37 | # -c 1 means use a single core 38 | # -c 3 means use 2 cores 39 | # -c f means use 4 cores 40 | # keep in mind, to achieve high IOPS, may need more than 1 core 41 | sudo timeout 30 ./perf -t 10 -s $s -q 2122000 -w randrw -M $m -c 0x1 -o $1 -L $lambda 42 | done 43 | printf "\n" >> $1 44 | done 45 | done 46 | -------------------------------------------------------------------------------- /apps/storage_service/.gitignore: -------------------------------------------------------------------------------- 1 | storage_server 2 | -------------------------------------------------------------------------------- /apps/storage_service/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for netbench 2 | ROOT_PATH=../.. 3 | include $(ROOT_PATH)/build/shared.mk 4 | 5 | librt_libs = $(ROOT_PATH)/bindings/cc/librt++.a 6 | INC += -I$(ROOT_PATH)/bindings/cc 7 | 8 | storage_server_src = storage_server.cc 9 | storage_server_obj = $(storage_server_src:.cc=.o) 10 | 11 | 12 | INC += -Isnappy/ -Isnappy/build/ 13 | LIBS += snappy/build/libsnappy.a -lcrypto 14 | 15 | src = $(storage_server_src) 16 | obj = $(src:.cc=.o) 17 | dep = $(obj:.o=.d) 18 | 19 | # must be first 20 | all: storage_server 21 | 22 | storage_server: $(storage_server_obj) $(librt_libs) $(RUNTIME_DEPS) 23 | $(LDXX) -o $@ $(LDFLAGS) ../../shim/libshim.a -ldl $(obj) $(librt_libs) $(RUNTIME_LIBS) $(LIBS) 24 | 25 | ifneq ($(MAKECMDGOALS),clean) 26 | -include $(dep) # include all dep files in the makefile 27 | endif 28 | 29 | # rule to generate a dep file by using the C preprocessor 30 | # (see man cpp for details on the -MM and -MT options) 31 | %.d: %.cc 32 | @$(CXX) $(CXXFLAGS) $< -MM -MT $(@:.d=.o) >$@ 33 | %.o: %.cc 34 | $(CXX) $(CXXFLAGS) -c $< -o $@ 35 | 36 | .PHONY: clean 37 | clean: 38 | rm -f $(obj) $(dep) storage_server 39 | -------------------------------------------------------------------------------- /apps/storage_service/reflex.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2017, Stanford University 3 | * 4 | * All rights reserved. 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * * Redistributions of source code must retain the above copyright notice, 10 | * this list of conditions and the following disclaimer. 11 | * 12 | * * Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * * Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 24 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 | * POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | 33 | enum msg_type { 34 | PUT, 35 | GET, 36 | PUT_ACK, 37 | GET_RESP, 38 | }; 39 | 40 | struct msg_header { 41 | void* addr; 42 | int cmd; 43 | size_t len; 44 | int tag; 45 | }; 46 | 47 | /* 48 | * ReFlex protocol support 49 | */ 50 | 51 | #define CMD_GET 0x00 52 | #define CMD_SET 0x01 53 | #define CMD_SET_NO_ACK 0x02 54 | 55 | #define RESP_OK 0x00 56 | #define RESP_EINVAL 0x04 57 | 58 | #define REQ_PKT 0x80 59 | #define RESP_PKT 0x81 60 | #define MAX_EXTRA_LEN 8 61 | #define MAX_KEY_LEN 8 62 | 63 | typedef struct __attribute__((__packed__)) { 64 | uint16_t magic; 65 | uint16_t opcode; 66 | void* req_handle; 67 | unsigned long lba; 68 | unsigned int lba_count; 69 | uint64_t tsc; 70 | } binary_header_blk_t; 71 | -------------------------------------------------------------------------------- /apps/storage_service/snappy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | git submodule update --init --recursive 6 | 7 | pushd snappy 8 | rm -rf build 9 | mkdir build 10 | pushd build 11 | 12 | cmake -DSNAPPY_BUILD_TESTS=0 -DCMAKE_BUILD_TYPE=Release .. 13 | make -j 14 | 15 | popd 16 | popd 17 | -------------------------------------------------------------------------------- /apps/stream/.gitignore: -------------------------------------------------------------------------------- 1 | stream 2 | stream_linux 3 | stream_query 4 | -------------------------------------------------------------------------------- /apps/stream/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for netbench 2 | ROOT_PATH=../.. 3 | include $(ROOT_PATH)/build/shared.mk 4 | 5 | stream_src = stream.cc 6 | stream_obj = $(stream_src:.cc=.o) 7 | 8 | stream_query_src = stream_query.cc 9 | stream_query_obj = $(stream_query_src:.cc=.o) 10 | 11 | lib_shim = $(ROOT_PATH)/shim/libshim.a -ldl 12 | 13 | librt_libs = $(ROOT_PATH)/bindings/cc/librt++.a 14 | INC += -I$(ROOT_PATH)/bindings/cc 15 | 16 | RUNTIME_LIBS := $(RUNTIME_LIBS) 17 | 18 | # must be first 19 | all: stream stream_query stream_linux 20 | 21 | stream: $(stream_obj) $(librt_libs) $(RUNTIME_DEPS) 22 | $(LDXX) -o $@ $(LDFLAGS) $(stream_obj) \ 23 | -Wl,--wrap=main $(lib_shim) $(librt_libs) $(RUNTIME_LIBS) 24 | 25 | stream_linux: $(stream_obj) 26 | $(LDXX) -o $@ $(LDFLAGS) $(stream_obj) -lpthread 27 | 28 | stream_query: $(stream_query_obj) $(librt_libs) $(RUNTIME_DEPS) 29 | $(LDXX) -o $@ $(LDFLAGS) $(stream_query_obj) \ 30 | $(librt_libs) $(RUNTIME_LIBS) 31 | 32 | # general build rules for all targets 33 | src = $(stream_src) $(stream_query_src) 34 | obj = $(src:.cc=.o) 35 | dep = $(obj:.o=.d) 36 | 37 | ifneq ($(MAKECMDGOALS),clean) 38 | -include $(dep) # include all dep files in the makefile 39 | endif 40 | 41 | # rule to generate a dep file by using the C preprocessor 42 | # (see man cpp for details on the -MM and -MT options) 43 | %.d: %.cc 44 | @$(CXX) $(CXXFLAGS) $< -MM -MT $(@:.d=.o) >$@ 45 | %.o: %.cc 46 | $(CXX) $(CXXFLAGS) -c $< -o $@ 47 | 48 | .PHONY: clean 49 | clean: 50 | rm -f $(obj) $(dep) stream stream_query stream_linux 51 | -------------------------------------------------------------------------------- /apps/stream/stream_query.cc: -------------------------------------------------------------------------------- 1 | extern "C" { 2 | #include 3 | } 4 | 5 | #include "runtime.h" 6 | #include "sync.h" 7 | #include "thread.h" 8 | #include "timer.h" 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | barrier_t barrier; 16 | 17 | #define SHM_KEY (0x123) 18 | #define CACHELINE 64 19 | 20 | int main(int argc, char *argv[]) { 21 | if (argc != 2) { 22 | std::cerr << "usage: [#threads]" << std::endl; 23 | return -EINVAL; 24 | } 25 | 26 | int threads = std::stoi(argv[1], nullptr, 0); 27 | 28 | int ret = base_init(); 29 | if (ret) { 30 | fprintf(stderr, "failed to init base, ret = %d\n", ret); 31 | return ret; 32 | } 33 | 34 | volatile double *cnt; 35 | int shmid = shmget((key_t)SHM_KEY, CACHELINE * threads, 0666 | IPC_CREAT); 36 | void *shm = NULL; 37 | shm = shmat(shmid, 0, 0); 38 | cnt = (volatile double *)shm; 39 | 40 | uint64_t last_total = 0; 41 | auto last = std::chrono::steady_clock::now(); 42 | while (1) { 43 | sleep(1); 44 | auto now = std::chrono::steady_clock::now(); 45 | uint64_t total = 0; 46 | double duration = 47 | std::chrono::duration_cast>(now - last) 48 | .count(); 49 | for (int i = 0; i < threads; i++) { 50 | total += cnt[i * CACHELINE / sizeof(double)]; 51 | } 52 | log_info("mops: %lf, timestamp: %lu", 53 | static_cast(total - last_total) / 1E6 / duration, 54 | (unsigned long)time(NULL)); 55 | last_total = total; 56 | last = now; 57 | } 58 | 59 | return 0; 60 | } 61 | -------------------------------------------------------------------------------- /apps/streamcluster/COPYRIGHT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2007 Princeton University 2 | Authors: Jia Deng 3 | Christian Bienia 4 | -------------------------------------------------------------------------------- /apps/streamcluster/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for netbench 2 | ROOT_PATH=../.. 3 | include $(ROOT_PATH)/build/shared.mk 4 | 5 | streamcluster_src = streamcluster.cc 6 | streamcluster_obj = $(streamcluster_src:.cc=.o) 7 | 8 | lib_shim = $(ROOT_PATH)/shim/libshim.a -ldl 9 | 10 | librt_libs = $(ROOT_PATH)/bindings/cc/librt++.a 11 | INC += -I$(ROOT_PATH)/bindings/cc 12 | 13 | CXXFLAGS += -DENABLE_THREADS -march=native 14 | LDFLAGS += -Wl,--wrap=main -no-pie 15 | 16 | RUNTIME_LIBS := $(RUNTIME_LIBS) 17 | 18 | # must be first 19 | all: streamcluster 20 | 21 | streamcluster: $(streamcluster_obj) $(librt_libs) $(RUNTIME_DEPS) 22 | $(LDXX) -o $@ $(LDFLAGS) $(streamcluster_obj) \ 23 | $(lib_shim) $(librt_libs) $(RUNTIME_LIBS) 24 | 25 | # general build rules for all targets 26 | src = $(streamcluster_src) 27 | obj = $(src:.cc=.o) 28 | dep = $(obj:.o=.d) 29 | 30 | ifneq ($(MAKECMDGOALS),clean) 31 | -include $(dep) # include all dep files in the makefile 32 | endif 33 | 34 | # rule to generate a dep file by using the C preprocessor 35 | # (see man cpp for details on the -MM and -MT options) 36 | %.d: %.cc 37 | @$(CXX) $(CXXFLAGS) $< -MM -MT $(@:.d=.o) >$@ 38 | %.o: %.cc 39 | $(CXX) $(CXXFLAGS) -c $< -o $@ 40 | 41 | .PHONY: clean 42 | clean: 43 | rm -f $(obj) $(dep) streamcluster 44 | -------------------------------------------------------------------------------- /apps/synthetic/.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | Cargo.lock 3 | -------------------------------------------------------------------------------- /apps/synthetic/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "synthetic" 3 | version = "0.1.1" 4 | authors = ["The Shenango Developers"] 5 | edition = "2021" 6 | 7 | [dependencies] 8 | arrayvec = "0.7.2" 9 | byteorder = "1.5" 10 | clap = "2.32.0" 11 | dns-parser = "0.7.1" 12 | hostname = "0.1.4" 13 | itertools = "0.8.0" 14 | libc = "0.2" 15 | rand = "0.7.3" 16 | rand_distr = "0.3.0" 17 | rand_mt = "3" 18 | serde = "1.0" 19 | net2 = "0.2" 20 | mersenne_twister = "1.1.1" 21 | shenango = { path = "../../bindings/rust" } 22 | 23 | [profile.release] 24 | debug=false 25 | panic = "abort" 26 | 27 | [workspace] 28 | -------------------------------------------------------------------------------- /apps/synthetic/build.rs: -------------------------------------------------------------------------------- 1 | use std::path::PathBuf; 2 | 3 | fn main() { 4 | let manifest_path: PathBuf = std::env::var("CARGO_MANIFEST_DIR") 5 | .unwrap() 6 | .parse() 7 | .unwrap(); 8 | // manifest_path is now .../caladan/apps/synthetic 9 | let link_script_path = manifest_path 10 | .parent() 11 | .unwrap() 12 | .parent() 13 | .unwrap() 14 | .join("base/base.ld"); 15 | println!( 16 | "cargo:rustc-link-arg=-T{}", 17 | link_script_path.to_str().unwrap() 18 | ); 19 | } 20 | -------------------------------------------------------------------------------- /apps/synthetic/rust-toolchain.toml: -------------------------------------------------------------------------------- 1 | [toolchain] 2 | channel = "nightly" 3 | targets = [ "x86_64-unknown-linux-gnu" ] 4 | -------------------------------------------------------------------------------- /apps/synthetic/src/distribution.rs: -------------------------------------------------------------------------------- 1 | use rand::Rng; 2 | use rand_distr::{Distribution as DistR, Exp}; 3 | 4 | #[derive(Copy, Clone, Debug)] 5 | pub enum Distribution { 6 | Zero, 7 | Constant(u64), 8 | Exponential(f64), 9 | Bimodal(f64, u64, u64), 10 | } 11 | 12 | impl Distribution { 13 | pub fn name(&self) -> &'static str { 14 | match *self { 15 | Distribution::Zero => "zero", 16 | Distribution::Constant(_) => "constant", 17 | Distribution::Exponential(_) => "exponential", 18 | Distribution::Bimodal(_, _, _) => "bimodal", 19 | } 20 | } 21 | pub fn sample(&self, rng: &mut R) -> u64 { 22 | match *self { 23 | Distribution::Zero => 0, 24 | Distribution::Constant(m) => m, 25 | Distribution::Exponential(m) => Exp::new(1.0 / m).unwrap().sample(rng) as u64, 26 | Distribution::Bimodal(p, v1, v2) => { 27 | if rng.gen_bool(p) { 28 | v1 29 | } else { 30 | v2 31 | } 32 | } 33 | } 34 | } 35 | 36 | pub fn create(spec: &str) -> Result { 37 | let tokens: Vec<&str> = spec.split(":").collect(); 38 | assert!(tokens.len() > 0); 39 | match tokens[0] { 40 | "zero" => Ok(Distribution::Zero), 41 | "constant" => { 42 | assert!(tokens.len() == 2); 43 | let val: u64 = tokens[1].parse().unwrap(); 44 | Ok(Distribution::Constant(val)) 45 | } 46 | "exponential" => { 47 | assert!(tokens.len() == 2); 48 | let val: f64 = tokens[1].parse().unwrap(); 49 | Ok(Distribution::Exponential(val)) 50 | } 51 | "bimodal" => { 52 | assert!(tokens.len() == 4); 53 | let prob: f64 = tokens[1].parse().unwrap(); 54 | let val1: u64 = tokens[2].parse().unwrap(); 55 | let val2: u64 = tokens[3].parse().unwrap(); 56 | Ok(Distribution::Bimodal(prob, val1, val2)) 57 | } 58 | _ => Err("bad distribution spec"), 59 | } 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /apps/synthetic/src/lockstep.rs: -------------------------------------------------------------------------------- 1 | extern crate hostname; 2 | 3 | use std::io::{self, Read, Write}; 4 | use std::net::SocketAddrV4; 5 | use std::time::Duration; 6 | 7 | use crate::Backend; 8 | use crate::Connection; 9 | 10 | pub enum Group { 11 | Server(Vec), 12 | Client(Connection), 13 | } 14 | 15 | impl Group { 16 | pub fn new_server( 17 | num_clients: usize, 18 | addr: SocketAddrV4, 19 | backend: Backend, 20 | ) -> io::Result { 21 | let listener = backend.create_tcp_listener(addr)?; 22 | let mut clients = Vec::new(); 23 | for _ in 0..num_clients { 24 | clients.push(listener.accept()?); 25 | } 26 | 27 | Ok(Group::Server(clients)) 28 | } 29 | 30 | pub fn new_client(addr: SocketAddrV4, backend: Backend) -> io::Result { 31 | for _ in 0..5 { 32 | match backend.create_tcp_connection(None, addr) { 33 | Ok(stream) => return Ok(Group::Client(stream)), 34 | Err(_) => backend.sleep(Duration::from_millis(50)), 35 | } 36 | } 37 | Ok(Group::Client(backend.create_tcp_connection(None, addr)?)) 38 | } 39 | 40 | pub fn barrier(&mut self) { 41 | let mut buf = [0; 1]; 42 | 43 | match *self { 44 | Group::Server(ref mut clients) => { 45 | for c in clients.iter_mut() { 46 | c.read_exact(&mut buf).unwrap(); 47 | } 48 | buf[0] = 0; 49 | for c in clients { 50 | c.write_all(&buf).unwrap(); 51 | } 52 | } 53 | Group::Client(ref mut stream) => { 54 | stream.write_all(&buf).unwrap(); 55 | stream.read_exact(&mut buf).unwrap(); 56 | } 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /apps/synthetic/src/payload.rs: -------------------------------------------------------------------------------- 1 | use crate::Buffer; 2 | use crate::Connection; 3 | use crate::LoadgenProtocol; 4 | use crate::Packet; 5 | use crate::Transport; 6 | 7 | use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; 8 | use std::io; 9 | use std::io::Read; 10 | 11 | pub struct Payload { 12 | pub work_iterations: u64, 13 | pub index: u64, 14 | pub randomness: u64, 15 | } 16 | 17 | pub const PAYLOAD_SIZE: usize = 24; 18 | 19 | #[derive(Clone, Copy)] 20 | pub struct SyntheticProtocol {} 21 | 22 | impl LoadgenProtocol for SyntheticProtocol { 23 | fn uses_ordered_requests(&self) -> bool { 24 | false 25 | } 26 | 27 | fn gen_req(&self, i: usize, p: &Packet, buf: &mut Vec) { 28 | Payload { 29 | work_iterations: p.work_iterations, 30 | index: i as u64, 31 | randomness: p.randomness, 32 | } 33 | .serialize_into(buf) 34 | .unwrap(); 35 | } 36 | 37 | fn read_response(&self, mut sock: &Connection, buf: &mut Buffer) -> io::Result<(usize, u64)> { 38 | let scratch = buf.get_empty_buf(); 39 | sock.read_exact(&mut scratch[..PAYLOAD_SIZE])?; 40 | let payload = Payload::deserialize(&mut &scratch[..])?; 41 | Ok((payload.index as usize, payload.randomness)) 42 | } 43 | } 44 | 45 | impl SyntheticProtocol { 46 | pub fn with_args(_matches: &clap::ArgMatches, _tport: Transport) -> Self { 47 | SyntheticProtocol {} 48 | } 49 | 50 | pub fn args<'a, 'b>() -> Vec> { 51 | vec![] 52 | } 53 | } 54 | 55 | impl Payload { 56 | pub fn serialize_into(&self, writer: &mut W) -> io::Result<()> { 57 | writer.write_u64::(self.work_iterations)?; 58 | writer.write_u64::(self.index)?; 59 | writer.write_u64::(self.randomness)?; 60 | Ok(()) 61 | } 62 | 63 | pub fn deserialize(reader: &mut R) -> io::Result { 64 | let p = Payload { 65 | work_iterations: reader.read_u64::()?, 66 | index: reader.read_u64::()?, 67 | randomness: reader.read_u64::()?, 68 | }; 69 | return Ok(p); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /base/base.ld: -------------------------------------------------------------------------------- 1 | SECTIONS 2 | { 3 | .perthread (NOLOAD) : { 4 | PROVIDE(__perthread_start = .); 5 | *(.perthread) 6 | PROVIDE(__perthread_end = .); 7 | } 8 | } 9 | INSERT AFTER .text 10 | -------------------------------------------------------------------------------- /base/fd_transfer.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | int recv_fd(int fd, int *fd_out) 9 | { 10 | struct msghdr msg; 11 | char buf[CMSG_SPACE(sizeof(int))]; 12 | struct iovec iov[1]; 13 | char iobuf[1]; 14 | ssize_t ret; 15 | struct cmsghdr *cmptr; 16 | 17 | /* init message header and buffs for control message and iovec */ 18 | msg.msg_control = buf; 19 | msg.msg_controllen = sizeof(buf); 20 | msg.msg_name = NULL; 21 | msg.msg_namelen = 0; 22 | 23 | iov[0].iov_base = iobuf; 24 | iov[0].iov_len = sizeof(iobuf); 25 | msg.msg_iov = iov; 26 | msg.msg_iovlen = 1; 27 | 28 | ret = recvmsg(fd, &msg, 0); 29 | if (ret < 0) { 30 | log_debug("control: error with recvmsg %ld", ret); 31 | return ret; 32 | } 33 | 34 | /* check validity of control message */ 35 | cmptr = CMSG_FIRSTHDR(&msg); 36 | if (cmptr == NULL) { 37 | log_debug("control: no cmsg %p", cmptr); 38 | return -1; 39 | } else if (cmptr->cmsg_len != CMSG_LEN(sizeof(int))) { 40 | log_debug("control: cmsg is too long %ld", cmptr->cmsg_len); 41 | return -1; 42 | } else if (cmptr->cmsg_level != SOL_SOCKET) { 43 | log_debug("control: unrecognized cmsg level %d", cmptr->cmsg_level); 44 | return -1; 45 | } else if (cmptr->cmsg_type != SCM_RIGHTS) { 46 | log_debug("control: unrecognized cmsg type %d", cmptr->cmsg_type); 47 | return -1; 48 | } 49 | 50 | *fd_out = *(int *)CMSG_DATA(cmptr); 51 | return 0; 52 | } 53 | 54 | int send_fd(int controlfd, int shared_fd) 55 | { 56 | struct msghdr msg; 57 | char buf[CMSG_SPACE(sizeof(int))]; 58 | struct iovec iov[1]; 59 | char iobuf[1]; 60 | struct cmsghdr *cmptr; 61 | 62 | /* init message header, iovec is necessary even though it's unused */ 63 | msg.msg_name = NULL; 64 | msg.msg_namelen = 0; 65 | msg.msg_control = buf; 66 | msg.msg_controllen = sizeof(buf); 67 | iov[0].iov_base = iobuf; 68 | iov[0].iov_len = sizeof(iobuf); 69 | msg.msg_iov = iov; 70 | msg.msg_iovlen = 1; 71 | 72 | /* init control message */ 73 | cmptr = CMSG_FIRSTHDR(&msg); 74 | cmptr->cmsg_len = CMSG_LEN(sizeof(int)); 75 | cmptr->cmsg_level = SOL_SOCKET; 76 | cmptr->cmsg_type = SCM_RIGHTS; 77 | *(int *)CMSG_DATA(cmptr) = shared_fd; 78 | 79 | if (sendmsg(controlfd, &msg, 0) != sizeof(iobuf)) { 80 | log_err("failed to send cmsg"); 81 | return -1; 82 | } 83 | 84 | return 0; 85 | } -------------------------------------------------------------------------------- /base/init.c: -------------------------------------------------------------------------------- 1 | /* 2 | * init.c - support for initialization 3 | */ 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include "init_internal.h" 12 | 13 | bool base_init_done __aligned(CACHE_LINE_SIZE); 14 | 15 | void __weak init_shutdown(int status) 16 | { 17 | log_info("init: shutting down -> %s", 18 | status == EXIT_SUCCESS ? "SUCCESS" : "FAILURE"); 19 | exit(status); 20 | } 21 | 22 | /* we initialize these early subsystems by hand */ 23 | static int init_internal(void) 24 | { 25 | int ret; 26 | 27 | ret = cpu_init(); 28 | if (ret) 29 | return ret; 30 | 31 | ret = time_init(); 32 | if (ret) 33 | return ret; 34 | 35 | ret = page_init(); 36 | if (ret) { 37 | log_err("Could not intialize memory. Please ensure that hugepages are " 38 | "enabled/available."); 39 | return ret; 40 | } 41 | 42 | return slab_init(); 43 | } 44 | 45 | 46 | extern int thread_init_perthread(void); 47 | 48 | /** 49 | * base_init - initializes the base library 50 | * 51 | * Call this function before using the library. 52 | * Returns 0 if successful, otherwise fail. 53 | */ 54 | int base_init(void) 55 | { 56 | int ret; 57 | 58 | ret = thread_init_perthread(); 59 | if (ret) 60 | return ret; 61 | 62 | ret = init_internal(); 63 | if (ret) 64 | return ret; 65 | 66 | base_init_done = true; 67 | return 0; 68 | } 69 | 70 | static int init_thread_internal(void) 71 | { 72 | return page_init_thread(); 73 | } 74 | 75 | /** 76 | * base_init_thread - prepares a thread for use by the base library 77 | * 78 | * Returns 0 if successful, otherwise fail. 79 | */ 80 | int base_init_thread(void) 81 | { 82 | int ret; 83 | 84 | ret = thread_init_perthread(); 85 | if (ret) 86 | return ret; 87 | 88 | ret = init_thread_internal(); 89 | if (ret) 90 | return ret; 91 | 92 | perthread_store(thread_init_done, true); 93 | return 0; 94 | } 95 | 96 | -------------------------------------------------------------------------------- /base/init_internal.h: -------------------------------------------------------------------------------- 1 | /* 2 | * init_internal.h - internal base library initialization routines 3 | */ 4 | 5 | #pragma once 6 | 7 | /* internal base library initializers */ 8 | extern int cpu_init(void); 9 | extern int time_init(void); 10 | extern int page_init(void); 11 | extern int slab_init(void); 12 | extern int smalloc_init(void); 13 | 14 | /* internal base library per-thread initializers */ 15 | extern int page_init_thread(void); 16 | extern int smalloc_init_thread(void); 17 | -------------------------------------------------------------------------------- /base/list.c: -------------------------------------------------------------------------------- 1 | /* Licensed under BSD-MIT - see LICENSE file for details */ 2 | #include 3 | #include 4 | #include 5 | 6 | static void *corrupt(const char *abortstr, 7 | const struct list_node *head, 8 | const struct list_node *node, 9 | unsigned int count) 10 | { 11 | if (abortstr) { 12 | panic("%s: prev corrupt in node %p (%u) of %p\n", 13 | abortstr, node, count, head); 14 | } 15 | return NULL; 16 | } 17 | 18 | struct list_node *list_check_node(const struct list_node *node, 19 | const char *abortstr) 20 | { 21 | const struct list_node *p, *n; 22 | int count = 0; 23 | 24 | for (p = node, n = node->next; n != node; p = n, n = n->next) { 25 | count++; 26 | if (n->prev != p) 27 | return corrupt(abortstr, node, n, count); 28 | } 29 | /* Check prev on head node. */ 30 | if (node->prev != p) 31 | return corrupt(abortstr, node, node, 0); 32 | 33 | return (struct list_node *)node; 34 | } 35 | 36 | struct list_head *list_check(const struct list_head *h, const char *abortstr) 37 | { 38 | if (!list_check_node(&h->n, abortstr)) 39 | return NULL; 40 | return (struct list_head *)h; 41 | } 42 | -------------------------------------------------------------------------------- /base/log.c: -------------------------------------------------------------------------------- 1 | /* 2 | * log.c - the logging system 3 | */ 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #define MAX_LOG_LEN 4096 17 | 18 | /* log levels greater than this value won't be printed */ 19 | int max_loglevel = LOG_DEBUG; 20 | 21 | void logk(int level, const char *fmt, ...) 22 | { 23 | char buf[MAX_LOG_LEN]; 24 | va_list ptr; 25 | off_t off; 26 | int cpu; 27 | 28 | if (level > max_loglevel) 29 | return; 30 | 31 | cpu = sched_getcpu(); 32 | 33 | if (likely(base_init_done)) { 34 | uint64_t us = microtime(); 35 | sprintf(buf, "[%3d.%06d] CPU %02d| <%d> ", 36 | (int)(us / ONE_SECOND), (int)(us % ONE_SECOND), 37 | cpu, level); 38 | } else { 39 | sprintf(buf, "CPU %02d| <%d> ", cpu, level); 40 | } 41 | 42 | off = strlen(buf); 43 | va_start(ptr, fmt); 44 | vsnprintf(buf + off, MAX_LOG_LEN - off, fmt, ptr); 45 | va_end(ptr); 46 | puts(buf); 47 | 48 | if (level <= LOG_ERR) 49 | fflush(stdout); 50 | } 51 | 52 | #define MAX_CALL_DEPTH 256 53 | void logk_backtrace(void) 54 | { 55 | void *buf[MAX_CALL_DEPTH]; 56 | const int calls = backtrace(buf, ARRAY_SIZE(buf)); 57 | backtrace_symbols_fd(buf, calls, 1); 58 | } 59 | 60 | void logk_bug(bool fatal, const char *expr, 61 | const char *file, int line, const char *func) 62 | { 63 | logk(LOG_EMERG, "%s: %s:%d ASSERTION '%s' FAILED IN '%s'", 64 | fatal ? "FATAL" : "WARN", file, line, expr, func); 65 | logk_backtrace(); 66 | 67 | if (fatal) 68 | init_shutdown(EXIT_FAILURE); 69 | } 70 | -------------------------------------------------------------------------------- /base/lrpc.c: -------------------------------------------------------------------------------- 1 | /* 2 | * lrpc.c - shared memory communication channels 3 | */ 4 | 5 | #include 6 | 7 | #include 8 | 9 | /* internal use only */ 10 | bool __lrpc_send(struct lrpc_chan_out *chan, uint64_t cmd, 11 | unsigned long payload) 12 | { 13 | struct lrpc_msg *dst; 14 | 15 | assert(chan->send_head - chan->send_tail == chan->size); 16 | 17 | chan->send_tail = load_acquire(chan->recv_head_wb); 18 | if (chan->send_head - chan->send_tail == chan->size) 19 | return false; 20 | 21 | dst = &chan->tbl[chan->send_head & (chan->size - 1)]; 22 | dst->payload = payload; 23 | 24 | cmd |= (chan->send_head++ & chan->size) ? 0 : LRPC_DONE_PARITY; 25 | store_release(&dst->cmd, cmd); 26 | return true; 27 | } 28 | 29 | /** 30 | * lrpc_init_out - initializes an egress shared memory channel 31 | * @chan: the channel struct to initialize 32 | * @tbl: a buffer to store channel messages 33 | * @size: the number of message elements in the buffer 34 | * @recv_head_wb: a pointer to the head position of the receiver 35 | * 36 | * returns 0 if successful, or -EINVAL if @size is not a power of two. 37 | */ 38 | int lrpc_init_out(struct lrpc_chan_out *chan, struct lrpc_msg *tbl, 39 | unsigned int size, uint32_t *recv_head_wb) 40 | { 41 | if (!is_power_of_two(size)) 42 | return -EINVAL; 43 | 44 | memset(chan, 0, sizeof(*chan)); 45 | chan->tbl = tbl; 46 | chan->size = size; 47 | chan->recv_head_wb = recv_head_wb; 48 | return 0; 49 | } 50 | 51 | /** 52 | * lrpc_init_in - initializes an ingress shared memory channel 53 | * @chan: the channel struct to initialize 54 | * @tbl: a buffer to store channel messages 55 | * @size: the number of message elements in the buffer 56 | * @recv_head_wb: a pointer to the head position of the receiver 57 | * 58 | * returns 0 if successful, or -EINVAL if @size is not a power of two. 59 | */ 60 | int lrpc_init_in(struct lrpc_chan_in *chan, struct lrpc_msg *tbl, 61 | unsigned int size, uint32_t *recv_head_wb) 62 | { 63 | if (!is_power_of_two(size)) 64 | return -EINVAL; 65 | 66 | memset(chan, 0, sizeof(*chan)); 67 | chan->tbl = tbl; 68 | chan->size = size; 69 | chan->recv_head_wb = recv_head_wb; 70 | return 0; 71 | } 72 | -------------------------------------------------------------------------------- /base/signal.c: -------------------------------------------------------------------------------- 1 | /* 2 | * signal.c - support for setting up signal handlers without using glibc 3 | */ 4 | 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #define SA_RESTORER 0x04000000 13 | 14 | /* copied from glibc sysdeps/unix/sysv/linux/kernel_sigaction.h */ 15 | struct kernel_sigaction { 16 | __sighandler_t k_sa_handler; 17 | unsigned long sa_flags; 18 | void (*sa_restorer) (void); 19 | sigset_t sa_mask; 20 | }; 21 | 22 | /* allow user to specify sa_restorer */ 23 | int base_sigaction_full(int sig, const struct sigaction *act, 24 | struct sigaction *oact) 25 | { 26 | long ret; 27 | struct kernel_sigaction kact, okact; 28 | 29 | if (act) { 30 | kact.k_sa_handler = act->sa_handler; 31 | memcpy(&kact.sa_mask, &act->sa_mask, sizeof(sigset_t)); 32 | kact.sa_flags = act->sa_flags | SA_RESTORER; 33 | kact.sa_restorer = act->sa_restorer; 34 | } 35 | 36 | ret = syscall(__NR_rt_sigaction, sig, act ? &kact : NULL, oact ? &okact : NULL, 8); 37 | 38 | if (ret < 0) { 39 | errno = -ret; 40 | return -1; 41 | } 42 | 43 | if (oact) { 44 | oact->sa_handler = okact.k_sa_handler; 45 | memcpy(&oact->sa_mask, &okact.sa_mask, sizeof(sigset_t)); 46 | oact->sa_flags = okact.sa_flags; 47 | oact->sa_restorer = okact.sa_restorer; 48 | } 49 | 50 | return 0; 51 | } 52 | 53 | 54 | /* use our own sa_restorer instead of glibc's */ 55 | int base_sigaction(int sig, const struct sigaction *act, struct sigaction *oact) 56 | { 57 | long ret; 58 | struct kernel_sigaction kact, okact; 59 | 60 | if (act) { 61 | kact.k_sa_handler = act->sa_handler; 62 | memcpy(&kact.sa_mask, &act->sa_mask, sizeof(sigset_t)); 63 | kact.sa_flags = act->sa_flags | SA_RESTORER; 64 | kact.sa_restorer = &syscall_rt_sigreturn; 65 | } 66 | 67 | ret = syscall(__NR_rt_sigaction, sig, act ? &kact : NULL, oact ? &okact : NULL, 8); 68 | 69 | if (ret < 0) { 70 | errno = -ret; 71 | return -1; 72 | } 73 | 74 | if (oact) { 75 | oact->sa_handler = okact.k_sa_handler; 76 | memcpy(&oact->sa_mask, &okact.sa_mask, sizeof(sigset_t)); 77 | oact->sa_flags = okact.sa_flags; 78 | oact->sa_restorer = okact.sa_restorer; 79 | } 80 | 81 | return 0; 82 | } 83 | -------------------------------------------------------------------------------- /base/syscall.S: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | .section .note.GNU-stack,"",@progbits 5 | .text 6 | .globl base_syscall_start 7 | base_syscall_start: 8 | 9 | .align 16 10 | .globl syscall_mmap 11 | syscall_mmap: 12 | movl $__NR_mmap, %eax 13 | movq %rcx, %r10 14 | syscall 15 | ret 16 | 17 | .align 16 18 | .globl syscall_mbind 19 | syscall_mbind: 20 | movl $__NR_mbind, %eax 21 | movq %rcx, %r10 22 | syscall 23 | ret 24 | 25 | .align 16 26 | .globl syscall_ioctl 27 | syscall_ioctl: 28 | movl $__NR_ioctl, %eax 29 | syscall 30 | ret 31 | 32 | .align 16 33 | .globl syscall_rt_sigreturn 34 | syscall_rt_sigreturn: 35 | movl $__NR_rt_sigreturn, %eax 36 | // Invalidate the alternate signal stack entry in this frame 37 | // so the kernel doesn't change it. 38 | movl $4, 24(%rsp) 39 | syscall 40 | ret 41 | 42 | .align 16 43 | .globl syscall_madvise 44 | syscall_madvise: 45 | movl $__NR_madvise, %eax 46 | syscall 47 | ret 48 | 49 | .align 16 50 | .globl syscall_mprotect 51 | syscall_mprotect: 52 | movl $__NR_mprotect, %eax 53 | syscall 54 | ret 55 | 56 | .globl base_syscall_end 57 | base_syscall_end: 58 | -------------------------------------------------------------------------------- /base/time.c: -------------------------------------------------------------------------------- 1 | /* 2 | * time.c - timekeeping utilities 3 | */ 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include "init_internal.h" 12 | 13 | int cycles_per_us __aligned(CACHE_LINE_SIZE); 14 | uint64_t start_tsc; 15 | 16 | /** 17 | * __timer_delay_us - spins the CPU for the specified delay 18 | * @us: the delay in microseconds 19 | */ 20 | void __time_delay_us(uint64_t us) 21 | { 22 | uint64_t cycles = us * cycles_per_us; 23 | unsigned long start = rdtsc(); 24 | 25 | while (rdtsc() - start < cycles) 26 | cpu_relax(); 27 | } 28 | 29 | /* derived from DPDK */ 30 | static int time_calibrate_tsc(void) 31 | { 32 | /* cycles_per_us may be provided in advance */ 33 | if (cycles_per_us) { 34 | start_tsc = rdtsc(); 35 | return 0; 36 | } 37 | 38 | /* TODO: New Intel CPUs report this value in CPUID */ 39 | struct timespec sleeptime = {.tv_nsec = 5E8 }; /* 1/2 second */ 40 | struct timespec t_start, t_end; 41 | 42 | cpu_serialize(); 43 | if (clock_gettime(CLOCK_MONOTONIC_RAW, &t_start) == 0) { 44 | uint64_t ns, end, start; 45 | double secs; 46 | 47 | start = rdtsc(); 48 | nanosleep(&sleeptime, NULL); 49 | clock_gettime(CLOCK_MONOTONIC_RAW, &t_end); 50 | end = rdtscp(NULL); 51 | ns = ((t_end.tv_sec - t_start.tv_sec) * 1E9); 52 | ns += (t_end.tv_nsec - t_start.tv_nsec); 53 | 54 | secs = (double)ns / 1000; 55 | cycles_per_us = (uint64_t)((end - start) / secs); 56 | log_info("time: detected %d ticks / us", cycles_per_us); 57 | 58 | /* record the start time of the binary */ 59 | start_tsc = rdtsc(); 60 | return 0; 61 | } 62 | 63 | return -1; 64 | } 65 | 66 | /** 67 | * time_init - global time initialization 68 | * 69 | * Returns 0 if successful, otherwise fail. 70 | */ 71 | int time_init(void) 72 | { 73 | return time_calibrate_tsc(); 74 | } 75 | -------------------------------------------------------------------------------- /bindings/cc/.gitignore: -------------------------------------------------------------------------------- 1 | test 2 | -------------------------------------------------------------------------------- /bindings/cc/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for C++ bindings 2 | ROOT_PATH=../.. 3 | include $(ROOT_PATH)/build/shared.mk 4 | 5 | # librt++.a - the c++ runtime library 6 | rt_src = runtime.cc thread.cc net.cc 7 | rt_obj = $(rt_src:.cc=.o) 8 | 9 | test_src = test.cc 10 | test_obj = $(test_src:.cc=.o) 11 | 12 | # must be first 13 | all: librt++.a test 14 | 15 | librt++.a: $(rt_obj) 16 | $(AR) rcs $@ $^ 17 | 18 | test: $(test_obj) librt++.a $(RUNTIME_DEPS) 19 | $(LDXX) $(LDFLAGS) -o $@ $(test_obj) librt++.a $(RUNTIME_LIBS) 20 | 21 | # general build rules for all targets 22 | src = $(rt_src) $(test_src) 23 | obj = $(src:.cc=.o) 24 | dep = $(obj:.o=.d) 25 | 26 | ifneq ($(MAKECMDGOALS),clean) 27 | -include $(dep) # include all dep files in the makefile 28 | endif 29 | 30 | # rule to generate a dep file by using the C preprocessor 31 | # (see man cpp for details on the -MM and -MT options) 32 | %.d: %.cc 33 | @$(CXX) $(CXXFLAGS) $< -MM -MT $(@:.d=.o) >$@ 34 | %.o: %.cc 35 | $(CXX) $(CXXFLAGS) -c $< -o $@ 36 | 37 | .PHONY: clean 38 | clean: 39 | rm -f $(obj) $(dep) librt++.a test 40 | -------------------------------------------------------------------------------- /bindings/cc/net.cc: -------------------------------------------------------------------------------- 1 | #include "net.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace { 8 | 9 | bool PullIOV(struct iovec **iovp, int *iovcntp, size_t n) { 10 | struct iovec *iov = *iovp; 11 | int iovcnt = *iovcntp, i; 12 | 13 | for (i = 0; i < iovcnt; ++i) { 14 | if (n < iov[i].iov_len) { 15 | iov[i].iov_base = reinterpret_cast(iov[i].iov_base) + n; 16 | iov[i].iov_len -= n; 17 | *iovp = &iov[i]; 18 | *iovcntp -= i; 19 | return true; 20 | } 21 | n -= iov[i].iov_len; 22 | } 23 | 24 | assert(n == 0); 25 | return false; 26 | } 27 | 28 | size_t SumIOV(const iovec *iov, int iovcnt) { 29 | size_t len = 0; 30 | for (int i = 0; i < iovcnt; ++i) len += iov[i].iov_len; 31 | return len; 32 | } 33 | 34 | } // namespace 35 | 36 | namespace rt { 37 | 38 | ssize_t TcpConn::WritevFullRaw(const iovec *iov, int iovcnt) { 39 | // first try to send without copying the vector 40 | ssize_t n = tcp_writev(c_, iov, iovcnt); 41 | if (n < 0) return n; 42 | assert(n > 0); 43 | 44 | // sum total length and check if everything was transfered 45 | size_t total = SumIOV(iov, iovcnt); 46 | if (static_cast(n) == total) return n; 47 | 48 | // partial transfer occurred, send the rest 49 | size_t len = n; 50 | std::unique_ptr v = std::unique_ptr{new iovec[iovcnt]}; 51 | iovec *iovp = v.get(); 52 | std::copy_n(iov, iovcnt, iovp); 53 | while (PullIOV(&iovp, &iovcnt, n)) { 54 | n = tcp_writev(c_, iovp, iovcnt); 55 | if (n < 0) return n; 56 | assert(n > 0); 57 | len += n; 58 | } 59 | 60 | assert(len == total); 61 | return len; 62 | } 63 | 64 | ssize_t TcpConn::ReadvFullRaw(const iovec *iov, int iovcnt) { 65 | // first try to receive without copying the vector 66 | ssize_t n = tcp_readv(c_, iov, iovcnt); 67 | if (n <= 0) return n; 68 | 69 | // sum total length and check if everything was transfered 70 | size_t total = SumIOV(iov, iovcnt); 71 | if (static_cast(n) == total) return n; 72 | 73 | // partial transfer occurred, receive the rest 74 | size_t len = n; 75 | std::unique_ptr v = std::unique_ptr{new iovec[iovcnt]}; 76 | iovec *iovp = v.get(); 77 | std::copy_n(iov, iovcnt, iovp); 78 | while (PullIOV(&iovp, &iovcnt, n)) { 79 | n = tcp_readv(c_, iovp, iovcnt); 80 | if (n <= 0) return n; 81 | len += n; 82 | } 83 | 84 | assert(len == total); 85 | return len; 86 | } 87 | 88 | } // namespace rt 89 | -------------------------------------------------------------------------------- /bindings/cc/runtime.cc: -------------------------------------------------------------------------------- 1 | #include "runtime.h" 2 | 3 | #include "thread.h" 4 | 5 | namespace rt { 6 | 7 | // initializes the runtime 8 | int RuntimeInit(std::string cfg_path, std::function main_func) { 9 | auto *func_copy = new std::function(main_func); 10 | return runtime_init(cfg_path.c_str(), thread_internal::ThreadTrampoline, 11 | reinterpret_cast(func_copy)); 12 | } 13 | 14 | } // namespace rt 15 | -------------------------------------------------------------------------------- /bindings/cc/runtime.h: -------------------------------------------------------------------------------- 1 | // the main header for Shenango's runtime 2 | 3 | #pragma once 4 | 5 | extern "C" { 6 | #include 7 | } 8 | 9 | #include 10 | #include 11 | 12 | namespace rt { 13 | 14 | // The highest number of cores supported. 15 | constexpr unsigned int kCoreLimit = NCPU; 16 | 17 | // Initializes the runtime. If successful, calls @main_func and does not return. 18 | int RuntimeInit(std::string cfg_path, std::function main_func); 19 | 20 | // Gets the queueing delay of runqueue (thread queue) + packet queue 21 | inline uint64_t RuntimeQueueUS() { return runtime_queue_us(); } 22 | 23 | // Gets an estimate of the instantanious load as measured by the IOKernel. 24 | inline float RuntimeLoad() { return runtime_load(); } 25 | 26 | // Gets the current number of active cores 27 | inline unsigned int RuntimeActiveCores() { return runtime_active_cores(); } 28 | 29 | // Gets the maximum number of cores the runtime could run on. 30 | inline unsigned int RuntimeMaxCores() { return runtime_max_cores(); } 31 | 32 | // Gets the guaranteed number of cores the runtime will at least get. 33 | inline unsigned int RuntimeGuaranteedCores() { 34 | return runtime_guaranteed_cores(); 35 | } 36 | 37 | }; // namespace rt 38 | -------------------------------------------------------------------------------- /bindings/cc/storage.h: -------------------------------------------------------------------------------- 1 | // storage.h - support for flash storage 2 | 3 | #pragma once 4 | 5 | extern "C" { 6 | #include 7 | } 8 | 9 | // TODO: this should be per-device. 10 | class Storage { 11 | public: 12 | // Write contiguous storage blocks. 13 | static int Write(const void *src, uint64_t lba, uint32_t lba_count) { 14 | return storage_write(src, lba, lba_count); 15 | } 16 | 17 | // Read contiguous storage blocks. 18 | static int Read(void *dst, uint64_t lba, uint32_t lba_count) { 19 | return storage_read(dst, lba, lba_count); 20 | } 21 | 22 | // Returns the size of each block. 23 | static uint32_t get_block_size() { return storage_block_size(); } 24 | 25 | // Returns the capacity of the device in blocks. 26 | static uint64_t get_num_blocks() { return storage_num_blocks(); } 27 | }; 28 | -------------------------------------------------------------------------------- /bindings/cc/test.cc: -------------------------------------------------------------------------------- 1 | extern "C" { 2 | #include 3 | #include 4 | } 5 | 6 | #include 7 | 8 | #include "runtime.h" 9 | #include "thread.h" 10 | #include "timer.h" 11 | 12 | namespace { 13 | 14 | constexpr int kTestValue = 10; 15 | 16 | void foo(int arg) { 17 | if (arg != kTestValue) BUG(); 18 | } 19 | 20 | void MainHandler() { 21 | std::string str = "captured!"; 22 | int i = kTestValue; 23 | int j = kTestValue; 24 | 25 | rt::Spawn([=] { 26 | log_info("hello from ThreadSpawn()! '%s'", str.c_str()); 27 | foo(i); 28 | }); 29 | 30 | rt::Spawn([&] { 31 | log_info("hello from ThreadSpawn()! '%s'", str.c_str()); 32 | foo(i); 33 | j *= 2; 34 | }); 35 | 36 | rt::Yield(); 37 | if (j != kTestValue * 2) BUG(); 38 | 39 | rt::Sleep(1 * rt::kMilliseconds); 40 | 41 | auto th = rt::Thread([&] { 42 | log_info("hello from rt::Thread! '%s'", str.c_str()); 43 | foo(i); 44 | }); 45 | th.Join(); 46 | } 47 | 48 | } // anonymous namespace 49 | 50 | int main(int argc, char *argv[]) { 51 | int ret; 52 | 53 | if (argc < 2) { 54 | printf("arg must be config file\n"); 55 | return -EINVAL; 56 | } 57 | 58 | ret = rt::RuntimeInit(argv[1], MainHandler); 59 | if (ret) { 60 | log_err("failed to start runtime"); 61 | return ret; 62 | } 63 | return 0; 64 | } 65 | -------------------------------------------------------------------------------- /bindings/cc/timer.h: -------------------------------------------------------------------------------- 1 | // timer.h - support for timers 2 | 3 | #pragma once 4 | 5 | extern "C" { 6 | #include 7 | #include 8 | } 9 | 10 | namespace rt { 11 | 12 | static constexpr uint64_t kMilliseconds = 1000; 13 | static constexpr uint64_t kSeconds = 1000000; 14 | 15 | // Gets the current number of microseconds since the launch of the runtime. 16 | inline uint64_t MicroTime() { return microtime(); } 17 | 18 | // Busy-spins for a microsecond duration. 19 | inline void Delay(uint64_t us) { delay_us(us); } 20 | 21 | // Sleeps until a microsecond deadline. 22 | inline void SleepUntil(uint64_t deadline_us) { timer_sleep_until(deadline_us); } 23 | 24 | // Sleeps for a microsecond duration. 25 | inline void Sleep(uint64_t duration_us) { timer_sleep(duration_us); } 26 | 27 | } // namespace rt 28 | -------------------------------------------------------------------------------- /bindings/rust/.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | -------------------------------------------------------------------------------- /bindings/rust/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "shenango" 3 | version = "0.1.1" 4 | authors = ["The Shenango Developers"] 5 | edition = "2021" 6 | 7 | [dependencies] 8 | libc = "0.2" 9 | byteorder = "1.5" 10 | 11 | [build-dependencies] 12 | bindgen = "0.69.1" 13 | build-deps = "0.1.4" 14 | proc-macro2 = "1.0.66" 15 | anyhow = "1.0.86" 16 | 17 | [[bin]] 18 | name = "runtime_threads" 19 | path = "src/test_runtime_threads.rs" 20 | 21 | [[bin]] 22 | name = "runtime_joinhandle" 23 | path = "src/test_runtime_joinhandle.rs" 24 | 25 | [[bin]] 26 | name = "hello" 27 | path = "src/test_hello.rs" 28 | 29 | [[bin]] 30 | name = "smalloc" 31 | path = "src/test_smalloc.rs" 32 | -------------------------------------------------------------------------------- /bindings/rust/rust-toolchain.toml: -------------------------------------------------------------------------------- 1 | [toolchain] 2 | channel = "nightly" 3 | targets = [ "x86_64-unknown-linux" ] 4 | -------------------------------------------------------------------------------- /bindings/rust/shenango.h: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | -------------------------------------------------------------------------------- /bindings/rust/src/asm.rs: -------------------------------------------------------------------------------- 1 | #[inline] 2 | pub fn cpu_relax() { 3 | unsafe { core::arch::x86_64::_mm_pause() } 4 | } 5 | 6 | #[inline] 7 | pub fn cpu_serialize() { 8 | unsafe { 9 | core::arch::x86_64::__cpuid(0); 10 | } 11 | } 12 | 13 | #[inline] 14 | pub fn rdtsc() -> u64 { 15 | unsafe { core::arch::x86_64::_rdtsc() } 16 | } 17 | 18 | #[inline] 19 | pub fn rdtscp() -> (u64, u32) { 20 | let mut aux: u32 = 0; 21 | let tsc = unsafe { core::arch::x86_64::__rdtscp(&mut aux as *mut u32) }; 22 | (tsc, aux) 23 | } 24 | -------------------------------------------------------------------------------- /bindings/rust/src/ffi.rs: -------------------------------------------------------------------------------- 1 | #![allow(non_upper_case_globals)] 2 | #![allow(non_camel_case_types)] 3 | #![allow(non_snake_case)] 4 | 5 | include!(concat!(env!("OUT_DIR"), "/bindings.rs")); 6 | -------------------------------------------------------------------------------- /bindings/rust/src/storage.rs: -------------------------------------------------------------------------------- 1 | use std::io::{Error, ErrorKind, Result}; 2 | 3 | use super::*; 4 | 5 | extern "C" { 6 | #[link_name = "block_size"] 7 | static block_size: ffi::u_int32_t; 8 | 9 | #[link_name = "num_blocks"] 10 | static num_blocks: ffi::u_int64_t; 11 | } 12 | 13 | pub fn storage_block_size() -> Result { 14 | let bsize = unsafe { block_size }; 15 | if bsize == 0 { 16 | return Err(Error::new(ErrorKind::Other, "storage not enabled")); 17 | } 18 | Ok(bsize as usize) 19 | } 20 | 21 | pub fn storage_num_blocks() -> Result { 22 | let nblocks = unsafe { num_blocks }; 23 | if nblocks == 0 { 24 | return Err(Error::new(ErrorKind::Other, "storage not enabled")); 25 | } 26 | Ok(nblocks as usize) 27 | } 28 | 29 | pub fn storage_read(buf: &mut [u8], lba: u64) -> Result { 30 | let bsize = storage_block_size()?; 31 | let nblocks = buf.len() / bsize; 32 | let res = unsafe { ffi::storage_read(buf.as_mut_ptr() as *mut c_void, lba, nblocks as u32) }; 33 | if res < 0 { 34 | Err(Error::from_raw_os_error(-res)) 35 | } else { 36 | Ok((nblocks * bsize) as usize) 37 | } 38 | } 39 | 40 | pub fn storage_write(buf: &[u8], lba: u64) -> Result { 41 | let bsize = storage_block_size()?; 42 | let nblocks = buf.len() / bsize; 43 | let res = unsafe { ffi::storage_write(buf.as_ptr() as *const c_void, lba, nblocks as u32) }; 44 | if res < 0 { 45 | Err(Error::from_raw_os_error(-res)) 46 | } else { 47 | Ok((nblocks * bsize) as usize) 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /bindings/rust/src/test_hello.rs: -------------------------------------------------------------------------------- 1 | extern crate shenango; 2 | 3 | fn main() { 4 | shenango::base_init().unwrap(); 5 | shenango::base_init_thread().unwrap(); 6 | } 7 | -------------------------------------------------------------------------------- /bindings/rust/src/test_runtime_joinhandle.rs: -------------------------------------------------------------------------------- 1 | extern crate shenango; 2 | 3 | use shenango::WaitGroup; 4 | use std::sync::Arc; 5 | 6 | const N: usize = 50000; 7 | const NCORES: usize = 3; 8 | 9 | fn main_handler() { 10 | println!("started main_handler() thread"); 11 | println!("creating threads with 1us of fake work."); 12 | 13 | let wg = Arc::new(WaitGroup::new()); 14 | wg.add(NCORES as i32); 15 | 16 | let start_us = shenango::microtime(); 17 | 18 | let mut join_handles = Vec::new(); 19 | for _ in 0..NCORES { 20 | join_handles.push(shenango::thread::spawn(|| { 21 | for _ in 0..N { 22 | shenango::thread::spawn(|| shenango::delay_us(1)) 23 | .join() 24 | .unwrap(); 25 | } 26 | })); 27 | } 28 | 29 | for j in join_handles { 30 | j.join().unwrap(); 31 | } 32 | 33 | let threads_per_second = 34 | (NCORES * N) as f64 / ((shenango::microtime() - start_us) as f64 * 0.000001); 35 | println!( 36 | "spawned {} threads / second, efficiency {}", 37 | threads_per_second, 38 | 0.000001 * threads_per_second / NCORES as f64 39 | ); 40 | } 41 | 42 | fn main() { 43 | let args: Vec<_> = ::std::env::args().collect(); 44 | assert!(args.len() >= 2, "arg must be config file"); 45 | shenango::runtime_init(args[1].clone(), main_handler).unwrap(); 46 | } 47 | -------------------------------------------------------------------------------- /bindings/rust/src/test_runtime_threads.rs: -------------------------------------------------------------------------------- 1 | extern crate shenango; 2 | 3 | use shenango::WaitGroup; 4 | use std::sync::Arc; 5 | 6 | const N: usize = 1000000; 7 | const NCORES: usize = 4; 8 | 9 | fn leaf_handler(wg_parent: Arc) { 10 | shenango::delay_us(1); 11 | wg_parent.done(); 12 | } 13 | 14 | fn work_handler(wg_parent: Arc) { 15 | let wg = Arc::new(WaitGroup::new()); 16 | wg.add(N as i32); 17 | for _ in 0..N { 18 | let wg2 = wg.clone(); 19 | shenango::thread::spawn_detached(move || leaf_handler(wg2)); 20 | shenango::thread::thread_yield(); 21 | } 22 | 23 | wg.wait(); 24 | wg_parent.done(); 25 | } 26 | 27 | fn main_handler() { 28 | println!("started main_handler() thread"); 29 | println!("creating threads with 1us of fake work."); 30 | 31 | let wg = Arc::new(WaitGroup::new()); 32 | wg.add(NCORES as i32); 33 | 34 | let start_us = shenango::microtime(); 35 | for _ in 0..NCORES { 36 | let wg2 = wg.clone(); 37 | shenango::thread::spawn_detached(move || work_handler(wg2)); 38 | } 39 | 40 | wg.wait(); 41 | 42 | let threads_per_second = 43 | (NCORES * N) as f64 / ((shenango::microtime() - start_us) as f64 * 0.000001); 44 | println!( 45 | "spawned {} threads / second, efficiency {}", 46 | threads_per_second, 47 | threads_per_second / (NCORES * N) as f64 48 | ); 49 | } 50 | 51 | fn main() { 52 | let args: Vec<_> = ::std::env::args().collect(); 53 | assert!(args.len() >= 2, "arg must be config file"); 54 | shenango::runtime_init(args[1].clone(), main_handler).unwrap(); 55 | } 56 | -------------------------------------------------------------------------------- /breakwater/Makefile: -------------------------------------------------------------------------------- 1 | ROOT_PATH=.. 2 | include $(ROOT_PATH)/build/shared.mk 3 | 4 | CFLAGS += -I$(ROOT_PATH)/breakwater/inc 5 | 6 | # breakwater - RPC layer with server overload control 7 | breakwater_src = $(wildcard src/*.c) 8 | breakwater_obj = $(breakwater_src:.c=.o) 9 | 10 | all: libbw.a 11 | 12 | libbw.a: $(breakwater_obj) 13 | $(AR) rcs $@ $^ 14 | 15 | # general build rules for all targets 16 | src = $(breakwater_src) 17 | obj = $(src:.c=.o) 18 | dep = $(obj:.o=.d) 19 | 20 | ifneq ($(MAKECMDGOALS),clean) 21 | -include $(dep) # include all dep files in the makefile 22 | endif 23 | 24 | # rule to generate a dep file by using the C preprocessor 25 | %.d: %.c 26 | @$(CC) $(CFLAGS) $< -MM -MT $(@:.d=.o) >$@ 27 | %.o: %.c 28 | @$(CC) $(CFLAGS) -c $< -o $@ 29 | 30 | .PHONY: clean 31 | clean: 32 | rm -f $(obj) $(dep) libbw.a 33 | -------------------------------------------------------------------------------- /breakwater/apps/netbench/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for netbench 2 | ROOT_PATH=../../.. 3 | include $(ROOT_PATH)/build/shared.mk 4 | 5 | BW_LIBS = $(ROOT_PATH)/breakwater/libbw.a 6 | 7 | lib_src = synthetic_worker.cc util.cc 8 | lib_obj = $(lib_src:.cc=.o) 9 | 10 | netbench_src = netbench.cc 11 | netbench_obj = $(netbench_src:.cc=.o) 12 | 13 | librt_libs = $(ROOT_PATH)/bindings/cc/librt++.a 14 | libbw_libs = $(ROOT_PATH)/breakwater/bindings/cc/libbw++.a 15 | INC += -I$(ROOT_PATH)/breakwater/inc 16 | INC += -I$(ROOT_PATH)/bindings 17 | INC += -I$(ROOT_PATH)/breakwater/bindings/cc/inc 18 | 19 | RUNTIME_LIBS := $(RUNTIME_LIBS) $(BW_LIBS) -lnuma 20 | 21 | # must be first 22 | all: netbench 23 | 24 | netbench: $(lib_obj) $(netbench_obj) $(librt_libs) $(libbw_libs) $(RUNTIME_DEPS) 25 | $(LDXX) -o $@ $(LDFLAGS) $(lib_obj) $(netbench_obj) \ 26 | $(librt_libs) $(libbw_libs) $(RUNTIME_LIBS) 27 | 28 | # general build rules for all targets 29 | src = $(lib_src) $(netbench_src) 30 | obj = $(src:.cc=.o) 31 | dep = $(obj:.o=.d) 32 | 33 | ifneq ($(MAKECMDGOALS),clean) 34 | -include $(dep) # include all dep files in the makefile 35 | endif 36 | 37 | # rule to generate a dep file by using the C preprocessor 38 | # (see man cpp for details on the -MM and -MT options) 39 | %.d: %.cc 40 | @$(CXX) $(CXXFLAGS) $< -MM -MT $(@:.d=.o) >$@ 41 | %.o: %.cc 42 | $(CXX) $(CXXFLAGS) -c $< -o $@ 43 | 44 | .PHONY: clean 45 | clean: 46 | rm -f $(obj) $(dep) netbench 47 | -------------------------------------------------------------------------------- /breakwater/apps/netbench/client.config: -------------------------------------------------------------------------------- 1 | # an example runtime config file 2 | host_addr 192.168.1.107 3 | host_netmask 255.255.255.0 4 | host_gateway 192.168.1.1 5 | runtime_kthreads 16 6 | runtime_spinning_kthreads 16 7 | enable_directpath 1 8 | -------------------------------------------------------------------------------- /breakwater/apps/netbench/server.config: -------------------------------------------------------------------------------- 1 | # an example runtime config file 2 | host_addr 192.168.1.103 3 | host_netmask 255.255.255.0 4 | host_gateway 192.168.1.1 5 | runtime_kthreads 10 6 | enable_directpath 1 7 | -------------------------------------------------------------------------------- /breakwater/apps/netbench/util.cc: -------------------------------------------------------------------------------- 1 | #include "util.h" 2 | 3 | std::vector split(const std::string &text, char sep) { 4 | std::vector tokens; 5 | std::string::size_type start = 0, end = 0; 6 | while ((end = text.find(sep, start)) != std::string::npos) { 7 | tokens.push_back(text.substr(start, end - start)); 8 | start = end + 1; 9 | } 10 | tokens.push_back(text.substr(start)); 11 | return tokens; 12 | } 13 | -------------------------------------------------------------------------------- /breakwater/apps/netbench/util.h: -------------------------------------------------------------------------------- 1 | // util.h - a collection of shared utilities 2 | 3 | #pragma once 4 | 5 | #include "cc/timer.h" 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | using namespace std::chrono; 13 | 14 | struct work_unit { 15 | double start_us, work_us, duration_us; 16 | int cpu; 17 | }; 18 | 19 | template 20 | std::vector GenerateWork(Arrival a, Service s, double cur_us, 21 | double last_us, int cpu) { 22 | std::vector w; 23 | while (cur_us < last_us) { 24 | cur_us += a(); 25 | w.emplace_back(work_unit{cur_us, s(), 0, cpu}); 26 | } 27 | return w; 28 | } 29 | 30 | template 31 | std::vector GenerateWork(Arrival a, Service *s, double cur_us, 32 | double last_us, int cpu) { 33 | std::vector w; 34 | while (cur_us < last_us) { 35 | cur_us += a(); 36 | w.emplace_back(work_unit{cur_us, (*s)(), 0, cpu}); 37 | } 38 | return w; 39 | } 40 | 41 | std::vector split(const std::string &text, char sep); 42 | 43 | class Timer { 44 | public: 45 | using micro = duration; 46 | 47 | Timer() { 48 | barrier(); 49 | start_ts_ = steady_clock::now(); 50 | barrier(); 51 | } 52 | ~Timer(){}; 53 | 54 | // Reset the timer start time. 55 | void Reset() { 56 | barrier(); 57 | start_ts_ = steady_clock::now(); 58 | barrier(); 59 | } 60 | 61 | // Returns the microseconds elapsed since the timer was constructed. 62 | double Elapsed() { 63 | barrier(); 64 | auto now = steady_clock::now(); 65 | barrier(); 66 | return duration_cast(now - start_ts_).count(); 67 | } 68 | 69 | // Busy spin until the deadline (in microseconds) passes. 70 | void SpinUntil(double deadline) { 71 | while (Elapsed() < deadline) cpu_relax(); 72 | } 73 | 74 | // Sleep until the deadline (in microseconds) passes. 75 | void SleepUntil(double deadline) { 76 | double diff = deadline - Elapsed(); 77 | if (diff <= 0) return; 78 | rt::Sleep(static_cast(diff)); 79 | } 80 | 81 | private: 82 | time_point start_ts_; 83 | }; 84 | -------------------------------------------------------------------------------- /breakwater/bindings/cc/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for C++ binidngs 2 | ROOT_PATH=../../.. 3 | include $(ROOT_PATH)/build/shared.mk 4 | 5 | CXXFLAGS += -I$(ROOT_PATH)/breakwater/inc 6 | CXXFLAGS += -I$(ROOT_PATH)/breakwater/bindings/cc/inc 7 | 8 | # libbw+++.a - the c++ breakwater library 9 | bw_src = rpc++.cc 10 | bw_obj = $(bw_src:.cc=.o) 11 | 12 | all: libbw++.a 13 | 14 | libbw++.a: $(bw_obj) 15 | $(AR) rcs $@ $^ 16 | 17 | #general build rules for all targets 18 | src = $(bw_src) 19 | obj = $(src:.cc=.o) 20 | dep = $(obj:.o=.d) 21 | 22 | ifneq ($(MAKECMDGOALS),clean) 23 | -include $(dep) # include all dep files in the makefile 24 | endif 25 | 26 | # rule to generate a dep file by using the C preprocessor 27 | %.d: %.cc 28 | @$(CXX) $(CXXFLAGS) $< -MM -MT $(@:.d=.o) >$@ 29 | %.o: %.cc 30 | $(CXX) $(CXXFLAGS) -c $< -o $@ 31 | 32 | .PHONY: clean 33 | clean: 34 | rm -f $(obj) $(dep) libbw++.a 35 | -------------------------------------------------------------------------------- /breakwater/bindings/cc/inc/breakwater/rpc++.h: -------------------------------------------------------------------------------- 1 | // rpc.h - support for remote procedure calls (RPCs) 2 | 3 | #pragma once 4 | 5 | extern "C" { 6 | #include 7 | #include 8 | } 9 | 10 | #include 11 | 12 | namespace rpc { 13 | 14 | class RpcClient { 15 | public: 16 | // The maximum size of an RPC request payload. 17 | static constexpr size_t kMaxPayloadSize = SRPC_BUF_SIZE; 18 | 19 | // Disable move and copy. 20 | RpcClient(const RpcClient&) = delete; 21 | RpcClient& operator=(const RpcClient&) = delete; 22 | 23 | // Creates an RPC session. 24 | static RpcClient *Dial(netaddr raddr, int id); 25 | 26 | // Sends an RPC request. 27 | ssize_t Send(const void *buf, size_t len, int hash); 28 | 29 | // Receives an RPC request. 30 | ssize_t Recv(void *buf, size_t len, uint64_t *latency); 31 | 32 | uint32_t WinAvail(); 33 | 34 | void StatClear(); 35 | 36 | uint64_t StatWinuRx(); 37 | 38 | uint64_t StatWinuTx(); 39 | 40 | uint64_t StatRespRx(); 41 | 42 | uint64_t StatReqTx(); 43 | 44 | uint64_t StatWinExpired(); 45 | 46 | uint64_t StatReqDropped(); 47 | 48 | // Shuts down the RPC connection. 49 | int Shutdown(int how); 50 | // Aborts the RPC connection. 51 | void Abort(); 52 | 53 | void Close(); 54 | 55 | private: 56 | RpcClient(struct crpc_session *s) : s_(s) { } 57 | 58 | // The client session object. 59 | struct crpc_session *s_; 60 | }; 61 | 62 | // Enables the RPC server, listening for new sessions. 63 | // Can only be called once. 64 | int RpcServerEnable(std::function f); 65 | 66 | uint64_t RpcServerStatWinuRx(); 67 | uint64_t RpcServerStatWinuTx(); 68 | uint64_t RpcServerStatWinTx(); 69 | uint64_t RpcServerStatReqRx(); 70 | uint64_t RpcServerStatReqDropped(); 71 | uint64_t RpcServerStatRespTx(); 72 | } // namespace rpc 73 | -------------------------------------------------------------------------------- /breakwater/inc/breakwater/breakwater.h: -------------------------------------------------------------------------------- 1 | /* 2 | * breakwater.h - breakwater implementation for RPC layer 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include "rpc.h" 12 | 13 | /* for RPC server */ 14 | 15 | struct sbw_ctx { 16 | struct srpc_ctx cmn; 17 | uint64_t ts_sent; 18 | bool drop; 19 | }; 20 | 21 | /* for RPC client */ 22 | struct cbw_session { 23 | struct crpc_session cmn; 24 | uint64_t id; 25 | uint64_t req_id; 26 | mutex_t lock; 27 | waitgroup_t timer_waiter; 28 | bool waiting_winupdate; 29 | uint32_t win_avail; 30 | uint32_t win_used; 31 | bool running; 32 | bool demand_sync; 33 | condvar_t timer_cv; 34 | bool init; 35 | 36 | /* a queue of pending RPC requests */ 37 | uint32_t head; 38 | uint32_t tail; 39 | struct crpc_ctx *qreq[CRPC_QLEN]; 40 | 41 | /* client-side stats */ 42 | uint64_t winu_rx_; 43 | uint64_t winu_tx_; 44 | uint64_t resp_rx_; 45 | uint64_t req_tx_; 46 | uint64_t win_expired_; 47 | uint64_t req_dropped_; 48 | }; 49 | -------------------------------------------------------------------------------- /breakwater/inc/breakwater/dagor.h: -------------------------------------------------------------------------------- 1 | /* 2 | * dagor.h - DAGOR implementation for RPC layer 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include "rpc.h" 12 | 13 | /* for RPC server */ 14 | 15 | struct sdg_ctx { 16 | struct srpc_ctx cmn; 17 | uint64_t ts_sent; 18 | bool drop; 19 | }; 20 | 21 | /* for RPC client */ 22 | 23 | struct cdg_ctx { 24 | struct crpc_ctx cmn; 25 | int prio; 26 | }; 27 | 28 | struct cdg_session { 29 | struct crpc_session cmn; 30 | uint64_t id; 31 | uint64_t req_id; 32 | int local_prio; 33 | mutex_t lock; 34 | bool running; 35 | condvar_t sender_cv; 36 | waitgroup_t sender_waiter; 37 | 38 | /* a queue of pending RPC requests */ 39 | uint32_t head; 40 | uint32_t tail; 41 | struct cdg_ctx *qreq[CRPC_QLEN]; 42 | 43 | /* client-side stats */ 44 | uint64_t winu_rx_; 45 | uint64_t winu_tx_; 46 | uint64_t resp_rx_; 47 | uint64_t req_tx_; 48 | uint64_t win_expired_; 49 | uint64_t req_dropped_; 50 | }; 51 | -------------------------------------------------------------------------------- /breakwater/inc/breakwater/nocontrol.h: -------------------------------------------------------------------------------- 1 | /* 2 | * nocontrol.h - No server overload implementation 3 | * for RPC layer 4 | */ 5 | 6 | #pragma once 7 | 8 | #include 9 | #include 10 | 11 | #include "rpc.h" 12 | 13 | /* for RPC server */ 14 | 15 | struct snc_ctx { 16 | struct srpc_ctx cmn; 17 | uint64_t ts; 18 | }; 19 | 20 | /* for RPC client */ 21 | struct cnc_session { 22 | struct crpc_session cmn; 23 | uint64_t id; 24 | uint64_t req_id; 25 | mutex_t lock; 26 | bool running; 27 | condvar_t sender_cv; 28 | waitgroup_t sender_waiter; 29 | 30 | /* a queue of pending RPC requests */ 31 | uint32_t head; 32 | uint32_t tail; 33 | struct crpc_ctx *qreq[CRPC_QLEN]; 34 | 35 | /* client-side stats */ 36 | uint64_t winu_rx_; 37 | uint64_t winu_tx_; 38 | uint64_t resp_rx_; 39 | uint64_t req_tx_; 40 | uint64_t win_expired_; 41 | uint64_t req_dropped_; 42 | }; 43 | -------------------------------------------------------------------------------- /breakwater/inc/breakwater/seda.h: -------------------------------------------------------------------------------- 1 | /* 2 | * seda.h - SEDA implementation for RPC layer 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include "rpc.h" 12 | 13 | /* for RPC server */ 14 | 15 | struct ssd_ctx { 16 | struct srpc_ctx cmn; 17 | uint64_t ts; 18 | }; 19 | 20 | /* for RPC client */ 21 | #define SEDA_NREQ 100 22 | 23 | struct csd_session { 24 | struct crpc_session cmn; 25 | uint64_t id; 26 | uint64_t req_id; 27 | mutex_t lock; 28 | condvar_t timer_cv; 29 | waitgroup_t timer_waiter; 30 | bool running; 31 | condvar_t sender_cv; 32 | waitgroup_t sender_waiter; 33 | 34 | /* token bucket for rate limiting */ 35 | double tb_token; 36 | double tb_refresh_rate; 37 | uint64_t tb_last_refresh; 38 | 39 | int32_t res_ts[SEDA_NREQ]; 40 | int res_idx; 41 | double cur; 42 | uint64_t seda_last_update; 43 | 44 | /* a queue of pending RPC requests */ 45 | uint32_t head; 46 | uint32_t tail; 47 | struct crpc_ctx *qreq[CRPC_QLEN]; 48 | 49 | /* client-side stats */ 50 | uint64_t winu_rx_; 51 | uint64_t winu_tx_; 52 | uint64_t resp_rx_; 53 | uint64_t req_tx_; 54 | uint64_t win_expired_; 55 | uint64_t req_dropped_; 56 | }; 57 | -------------------------------------------------------------------------------- /breakwater/scripts/setup_machine.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # run with sudo 3 | 4 | pushd .. 5 | 6 | # Build ksched.ko 7 | cd ksched && make clean && make && cd .. 8 | 9 | # Shenango setup 10 | ./scripts/setup_machine.sh 11 | 12 | # turn on cstate 13 | killall cstate 14 | cd scripts 15 | gcc cstate.c -o cstate 16 | ./cstate 0 & 17 | cd .. 18 | 19 | # Disable frequency scaling 20 | echo performance | tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor 21 | 22 | # Disable turbo boost 23 | echo 1 | tee /sys/devices/system/cpu/intel_pstate/no_turbo 24 | 25 | popd 26 | -------------------------------------------------------------------------------- /breakwater/src/bw_config.h: -------------------------------------------------------------------------------- 1 | /* 2 | * bw_config.h - Breakwater configurations 3 | */ 4 | 5 | #pragma once 6 | 7 | /* Recommended parameters (in XL170 environment) 8 | * - 1 us average service time 9 | * #define SBW_MIN_DELAY_US 45 10 | * #define SBW_DROP_THRESH 90 11 | * 12 | * - 10 us average service time 13 | * #define SBW_MIN_DELAY_US 80 14 | * #define SBW_DROP_THRESH 160 15 | * 16 | * - 100 us average service time 17 | * #define SBW_MIN_DELAY_US 500 18 | * #define SBW_DROP_THRESH 1000 19 | */ 20 | 21 | /* delay threshold to detect congestion */ 22 | #define SBW_MIN_DELAY_US 80 23 | /* delay threshold for AQM */ 24 | #define SBW_DROP_THRESH 160 25 | 26 | /* round trip time in us */ 27 | #define SBW_RTT_US 10 28 | 29 | #define SBW_AI 0.001 30 | #define SBW_MD 0.02 31 | #define CBW_MAX_CLIENT_DELAY_US 10 32 | -------------------------------------------------------------------------------- /breakwater/src/bw_proto.h: -------------------------------------------------------------------------------- 1 | /* 2 | * bw_proto.h - RPC protocol definitions for BreakWater 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | #define BW_REQ_MAGIC 0x63727063 /* 'crpc' */ 10 | #define BW_RESP_MAGIC 0x73727063 /* 'srpc' */ 11 | 12 | enum { 13 | BW_OP_CALL = 0, /* performs a procedure call */ 14 | BW_OP_WINUPDATE, /* just updates the window (no call) */ 15 | BW_OP_MAX, /* maximum number of opcodes */ 16 | }; 17 | 18 | #define BW_CFLAG_DSYNC 0x01 19 | 20 | #define BW_SFLAG_DROP 0x01 21 | 22 | /* header used for CLIENT -> SERVER */ 23 | struct cbw_hdr { 24 | uint32_t magic; /* must be set to RPC_REQ_MAGIC */ 25 | uint32_t op; /* the opcode */ 26 | size_t len; /* length of request in bytes */ 27 | uint64_t id; /* Request / Response ID */ 28 | uint64_t demand;/* the demanded window size */ 29 | uint64_t ts_sent; 30 | uint8_t flags; 31 | }; 32 | 33 | /* header used for SERVER -> CLIENT */ 34 | struct sbw_hdr { 35 | uint32_t magic; /* must be set to RPC_RESP_MAGIC */ 36 | uint32_t op; /* the opcode */ 37 | size_t len; /* length of response in bytes */ 38 | uint64_t id; /* Request / Response ID */ 39 | uint64_t win; /* the offered window size */ 40 | uint64_t ts_sent; 41 | uint8_t flags; 42 | }; 43 | -------------------------------------------------------------------------------- /breakwater/src/dg_config.h: -------------------------------------------------------------------------------- 1 | /* 2 | * dg_config.h - Dagor configurations 3 | */ 4 | 5 | #pragma once 6 | 7 | /* Recommended parameters with 1,000 clinets 8 | * in XL170 environment 9 | * - 1 us average service time 10 | * (bimod) #define DAGOR_OVERLOAD_THRESH 20 11 | * (exp) #define DAGOR_OVERLOAD_THRESH 30 12 | * (const) #define DAGOR_OVERLOAD_THRESH 30 13 | * 14 | * - 10 us average service time 15 | * (bimod) #define DAGOR_OVERLOAD_THRESH 70 16 | * (exp) #define DAGOR_OVERLOAD_THRESH 60 17 | * (const) #define DAGOR_OVERLOAD_THRESH 50 18 | * 19 | * - 100 us average service time 20 | * (bimod) #define DAGOR_OVERLOAD_THRESH 450 21 | * (exp) #define DAGOR_OVERLOAD_THRESH 400 22 | * (const) #define DAGOR_OVERLOAD_THRESH 350 23 | */ 24 | 25 | /* delay threshold to detect congestion */ 26 | #define DAGOR_OVERLOAD_THRESH 60 // in us 27 | /* max priority update interval */ 28 | #define DAGOR_PRIO_UPDATE_INT 1000 // in us 29 | /* max # requests for priority update */ 30 | #define DAGOR_PRIO_UPDATE_REQS 2000 // in # reqs 31 | /* queueing delay monitor interval */ 32 | #define DAGOR_PRIO_MONITOR 10 33 | /* decrement factor when congested */ 34 | #define DAGOR_ALPHA 0.95 35 | /* increment factor when uncongested */ 36 | #define DAGOR_BETA 0.01 37 | 38 | #define CDG_BATCH_WAIT_US 0 39 | -------------------------------------------------------------------------------- /breakwater/src/dg_proto.h: -------------------------------------------------------------------------------- 1 | /* 2 | * proto.h - RPC protocol definitions 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | #define DG_REQ_MAGIC 0x63727063 /* 'crpc' */ 10 | #define DG_RESP_MAGIC 0x73727063 /* 'srpc' */ 11 | #define DG_MAX_PRIO 128 12 | 13 | enum { 14 | DG_OP_CALL = 0, /* performs a procedure call */ 15 | DG_OP_WINUPDATE, /* just updates the window (no call) */ 16 | DG_OP_MAX, /* maximum number of opcodes */ 17 | }; 18 | 19 | #define DG_SFLAG_DROP 0x01 20 | 21 | /* header used for CLIENT -> SERVER */ 22 | struct cdg_hdr { 23 | uint32_t magic; /* must be set to RPC_REQ_MAGIC */ 24 | uint32_t op; /* the opcode */ 25 | size_t len; /* length of request in bytes */ 26 | uint64_t id; /* Request / Response ID */ 27 | int prio; /* the demanded window size */ 28 | uint64_t ts_sent; 29 | }; 30 | 31 | /* header used for SERVER -> CLIENT */ 32 | struct sdg_hdr { 33 | uint32_t magic; /* must be set to RPC_RESP_MAGIC */ 34 | uint32_t op; /* the opcode */ 35 | size_t len; /* length of response in bytes */ 36 | uint64_t id; /* Request / Response ID */ 37 | int prio; /* the offered window size */ 38 | uint64_t ts_sent; 39 | uint8_t flags; 40 | }; 41 | -------------------------------------------------------------------------------- /breakwater/src/nc_config.h: -------------------------------------------------------------------------------- 1 | /* 2 | * nc_config.h - NoControl configurations 3 | */ 4 | 5 | #pragma once 6 | 7 | /* turn on AQM? */ 8 | #define SNC_AQM_ON false 9 | /* AQM Threshold */ 10 | #define SNC_AQM_THRESH 2000 11 | -------------------------------------------------------------------------------- /breakwater/src/nc_proto.h: -------------------------------------------------------------------------------- 1 | /* 2 | * proto.h - RPC protocol definitions for NoControl 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | #define NC_REQ_MAGIC 0x63727063 /* 'crpc' */ 10 | #define NC_RESP_MAGIC 0x73727063 /* 'srpc' */ 11 | 12 | enum { 13 | NC_OP_CALL = 0, /* performs a procedure call */ 14 | NC_OP_WINUPDATE, /* just updates the window (no call) */ 15 | NC_OP_MAX, /* maximum number of opcodes */ 16 | }; 17 | 18 | /* header used for CLIENT -> SERVER */ 19 | struct cnc_hdr { 20 | uint32_t magic; /* must be set to RPC_REQ_MAGIC */ 21 | uint32_t op; /* the opcode */ 22 | size_t len; /* length of request in bytes */ 23 | uint64_t id; /* Request / Response ID */ 24 | uint64_t ts; 25 | }; 26 | 27 | /* header used for SERVER -> CLIENT */ 28 | struct snc_hdr { 29 | uint32_t magic; /* must be set to RPC_RESP_MAGIC */ 30 | uint32_t op; /* the opcode */ 31 | size_t len; /* length of response in bytes */ 32 | uint64_t id; /* Request / Response ID */ 33 | uint64_t ts; 34 | }; 35 | -------------------------------------------------------------------------------- /breakwater/src/sd_config.h: -------------------------------------------------------------------------------- 1 | /* 2 | * sd_config.h - SEDA configurations 3 | */ 4 | 5 | #pragma once 6 | 7 | /* Recommended parameters with 1,000 clinets 8 | * in XL170 environment 9 | * - 1 us average service time 10 | * #define CSD_MAX_CLIENT_DELAY_US 10 11 | * #define CSD_TB_INIT_RATE 4 12 | * #define CSD_TB_MIN_RATE 2 13 | * #define SEDA_TARGET 50 14 | * #define SEDA_ADJ_I 120 15 | * #define SEDA_ADJ_D 1.04 16 | * 17 | * - 10 us average service time 18 | * #define CSD_MAX_CLIENT_DELAY_US 100 19 | * #define CSD_TB_INIT_RATE 4 20 | * #define CSD_TB_MIN_RATE 2 21 | * #define SEDA_TARGET 80 22 | * #define SEDA_ADJ_I 40 23 | * #define SEDA_ADJ_D 1.04 24 | * 25 | * - 100 us average service time 26 | * #define CSD_MAX_CLIENT_DELAY_US 100 27 | * #define CSD_TB_INIT_RATE 4 28 | * #define CSD_TB_MIN_RATE 2 29 | * #define SEDA_TARGET 720 30 | * #define SEDA_ADJ_I 10 31 | * #define SEDA_ADJ_D 1.3 32 | */ 33 | 34 | /* maximum client delay */ 35 | #define CSD_MAX_CLIENT_DELAY_US 100 36 | /* Token bucket initial rate (reqs/sec) */ 37 | #define CSD_TB_INIT_RATE 4 38 | /* Token bucket minimum rate (reqs/sec) */ 39 | #define CSD_TB_MIN_RATE 2 40 | /* Token bucket maximum number of token (burstiness) */ 41 | #define CSD_TB_MAX_TOKEN 4 42 | /* EWMA filter constant */ 43 | #define SEDA_ALPHA 0.7 44 | /* target 90th percentile delay */ 45 | #define SEDA_TARGET 80 46 | /* time before controller run */ 47 | #define SEDA_TIMEOUT 1000 48 | /* % error to trigger decrease */ 49 | #define SEDA_ERR_D 0.0 50 | /* % error to trigger increase */ 51 | #define SEDA_ERR_I -0.5 52 | /* additive rate increase */ 53 | #define SEDA_ADJ_I 4.0 54 | /* multiplicative rate decrease */ 55 | #define SEDA_ADJ_D 1.1 56 | /* weight on additive increase */ 57 | #define SEDA_CI -0.1 58 | -------------------------------------------------------------------------------- /breakwater/src/sd_proto.h: -------------------------------------------------------------------------------- 1 | /* 2 | * proto.h - RPC protocol definitions for SEDA 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | #define SD_REQ_MAGIC 0x63727063 /* 'crpc' */ 10 | #define SD_RESP_MAGIC 0x73727063 /* 'srpc' */ 11 | 12 | enum { 13 | SD_OP_CALL = 0, /* performs a procedure call */ 14 | SD_OP_WINUPDATE, /* just updates the window (no call) */ 15 | SD_OP_MAX, /* maximum number of opcodes */ 16 | }; 17 | 18 | /* header used for CLIENT -> SERVER */ 19 | struct csd_hdr { 20 | uint32_t magic; /* must be set to RPC_REQ_MAGIC */ 21 | uint32_t op; /* the opcode */ 22 | size_t len; /* length of request in bytes */ 23 | uint64_t id; /* Request / Response ID */ 24 | uint64_t ts; 25 | }; 26 | 27 | /* header used for SERVER -> CLIENT */ 28 | struct ssd_hdr { 29 | uint32_t magic; /* must be set to RPC_RESP_MAGIC */ 30 | uint32_t op; /* the opcode */ 31 | size_t len; /* length of response in bytes */ 32 | uint64_t id; /* Request / Response ID */ 33 | uint64_t ts; 34 | }; 35 | -------------------------------------------------------------------------------- /breakwater/src/util.c: -------------------------------------------------------------------------------- 1 | /* 2 | * util.c - utility functions for RPC 3 | */ 4 | 5 | #include "util.h" 6 | 7 | /** 8 | * tcp_read_full - reads exactly the requested bytes or fails 9 | * @c: the TCP connection to read from 10 | * @buf: the buffer to store the read 11 | * @len: the exact length of the read 12 | * 13 | * Returns @len bytes or <= 0 if there was an error. 14 | */ 15 | ssize_t tcp_read_full(tcpconn_t *c, void *buf, size_t len) 16 | { 17 | char *pos = buf; 18 | size_t n = 0; 19 | 20 | while (n < len) { 21 | ssize_t ret = tcp_read(c, pos + n, len - n); 22 | if (ret <= 0) 23 | return ret; 24 | n += ret; 25 | } 26 | 27 | assert(n == len); 28 | return n; 29 | } 30 | 31 | /** 32 | * tcp_write_full - writes exactly the requested bytes or fails 33 | * @c: the TCP connection to write to 34 | * @buf: the buffer to write to the socket 35 | * @len: the exact length of the write 36 | * 37 | * Returns @len bytes or < 0 if there was an error. 38 | */ 39 | ssize_t tcp_write_full(tcpconn_t *c, const void *buf, size_t len) 40 | { 41 | const char *pos = buf; 42 | size_t n = 0; 43 | 44 | while (n < len) { 45 | ssize_t ret = tcp_write(c, pos + n, len - n); 46 | if (ret < 0) 47 | return ret; 48 | assert(ret > 0); 49 | n += ret; 50 | } 51 | 52 | assert(n == len); 53 | return n; 54 | } 55 | 56 | static bool pull_iov(struct iovec **iovp, int *iovcntp, size_t n) 57 | { 58 | struct iovec *iov = *iovp; 59 | int iovcnt = *iovcntp, i; 60 | 61 | for (i = 0; i < iovcnt; i++) { 62 | if (n < iov[i].iov_len) { 63 | iov[i].iov_base = (char *)iov[i].iov_base + n; 64 | iov[i].iov_len -= n; 65 | *iovp = &iov[i]; 66 | *iovcntp -= i; 67 | return true; 68 | } 69 | n -= iov[i].iov_len; 70 | } 71 | 72 | assert (n == 0); 73 | return false; 74 | } 75 | 76 | /** 77 | * tcp_writev_full - writes exactly the requested vector of bytes or fails 78 | * @c: the TCP connection to write to 79 | * @iov: the scatter-gather array of buffers to write 80 | * @iovcnt: the number of entries in @iov 81 | * 82 | * WARNING: @iov could be modified by this function, and its state is undefined 83 | * after calling it. 84 | * 85 | * Returns the number of written bytes or < 0 if there was an error. 86 | */ 87 | ssize_t tcp_writev_full(tcpconn_t *c, struct iovec *iov, int iovcnt) 88 | { 89 | ssize_t n, len = 0; 90 | 91 | do { 92 | n = tcp_writev(c, iov, iovcnt); 93 | if (n < 0) 94 | return n; 95 | assert(n > 0); 96 | len += n; 97 | } while (pull_iov(&iov, &iovcnt, n)); 98 | 99 | return len; 100 | } 101 | -------------------------------------------------------------------------------- /breakwater/src/util.h: -------------------------------------------------------------------------------- 1 | /* 2 | * util.h - utility functions for RPC 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | extern ssize_t tcp_read_full(tcpconn_t *c, void *buf, size_t len); 11 | extern ssize_t tcp_write_full(tcpconn_t *c, const void *buf, size_t len); 12 | extern ssize_t tcp_writev_full(tcpconn_t *c, struct iovec *iov, int iovcnt); 13 | -------------------------------------------------------------------------------- /build/config: -------------------------------------------------------------------------------- 1 | # build configuration options (set to y for "yes", n for "no") 2 | 3 | # Enable Mellanox ConnectX-4,5 NIC Support 4 | CONFIG_MLX5=n 5 | # Enable SPDK NVMe support 6 | CONFIG_SPDK=n 7 | # Enable debug build mode (slower but enables several runtime checks) 8 | CONFIG_DEBUG=n 9 | # Enable additional compiler optimizations (may reduce compatibility) 10 | CONFIG_OPTIMIZE=n 11 | # Allow runtimes to access Mellanox ConnectX-5 NICs directly (kernel bypass) 12 | CONFIG_DIRECTPATH=n 13 | # Build with clang instead of gcc 14 | CONFIG_CLANG=n 15 | # Build with split TX pool 16 | CONFIG_SPLIT_TX=n -------------------------------------------------------------------------------- /build/patches/dpdk/0001-config-extend-max-memseg-lists.patch: -------------------------------------------------------------------------------- 1 | From 9859b2fe35c2f039659075bcc11d03f691107588 Mon Sep 17 00:00:00 2001 2 | From: Josh Fried 3 | Date: Mon, 30 Dec 2024 17:27:30 -0600 4 | Subject: [PATCH 1/3] config: extend max memseg lists 5 | 6 | --- 7 | config/rte_config.h | 2 +- 8 | 1 file changed, 1 insertion(+), 1 deletion(-) 9 | 10 | diff --git a/config/rte_config.h b/config/rte_config.h 11 | index da265d7dd2..88ad16cc37 100644 12 | --- a/config/rte_config.h 13 | +++ b/config/rte_config.h 14 | @@ -30,7 +30,7 @@ 15 | /* EAL defines */ 16 | #define RTE_CACHE_GUARD_LINES 1 17 | #define RTE_MAX_HEAPS 32 18 | -#define RTE_MAX_MEMSEG_LISTS 128 19 | +#define RTE_MAX_MEMSEG_LISTS 1024 20 | #define RTE_MAX_MEMSEG_PER_LIST 8192 21 | #define RTE_MAX_MEM_MB_PER_LIST 32768 22 | #define RTE_MAX_MEMSEG_PER_TYPE 32768 23 | -- 24 | 2.43.0 25 | 26 | -------------------------------------------------------------------------------- /build/patches/dpdk/0002-i40e-disable-itr.patch: -------------------------------------------------------------------------------- 1 | From df9791417a07e6eb333be938f89bb7f9a20e865b Mon Sep 17 00:00:00 2001 2 | From: Josh Fried 3 | Date: Mon, 30 Dec 2024 17:30:52 -0600 4 | Subject: [PATCH 2/3] i40e: disable itr 5 | 6 | --- 7 | drivers/net/i40e/i40e_ethdev.h | 4 ++-- 8 | 1 file changed, 2 insertions(+), 2 deletions(-) 9 | 10 | diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h 11 | index 1bbe7ad376..3dac4b4be9 100644 12 | --- a/drivers/net/i40e/i40e_ethdev.h 13 | +++ b/drivers/net/i40e/i40e_ethdev.h 14 | @@ -193,9 +193,9 @@ enum i40e_flxpld_layer_idx { 15 | /* Default queue interrupt throttling time in microseconds */ 16 | #define I40E_ITR_INDEX_DEFAULT 0 17 | #define I40E_ITR_INDEX_NONE 3 18 | -#define I40E_QUEUE_ITR_INTERVAL_DEFAULT 32 /* 32 us */ 19 | +#define I40E_QUEUE_ITR_INTERVAL_DEFAULT 0 /* 32 us */ 20 | #define I40E_QUEUE_ITR_INTERVAL_MAX 8160 /* 8160 us */ 21 | -#define I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT 32 /* 32 us */ 22 | +#define I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT 0 /* 32 us */ 23 | /* Special FW support this floating VEB feature */ 24 | #define FLOATING_VEB_SUPPORTED_FW_MAJ 5 25 | #define FLOATING_VEB_SUPPORTED_FW_MIN 0 26 | -- 27 | 2.43.0 28 | 29 | -------------------------------------------------------------------------------- /build/patches/rdma-core/0004-expose-object-id.patch: -------------------------------------------------------------------------------- 1 | From f793f54d9865e63c04885f9ef816239ac58ee3ba Mon Sep 17 00:00:00 2001 2 | From: Josh Fried 3 | Date: Thu, 27 Apr 2023 11:52:00 -0400 4 | Subject: [PATCH 4/4] expose object id 5 | 6 | --- 7 | providers/mlx5/libmlx5.map | 1 + 8 | providers/mlx5/mlx5.c | 5 +++++ 9 | providers/mlx5/mlx5dv.h | 1 + 10 | 3 files changed, 7 insertions(+) 11 | 12 | diff --git a/providers/mlx5/libmlx5.map b/providers/mlx5/libmlx5.map 13 | index 375d2b38..a4f5fc07 100644 14 | --- a/providers/mlx5/libmlx5.map 15 | +++ b/providers/mlx5/libmlx5.map 16 | @@ -239,4 +239,5 @@ MLX5_1.24 { 17 | mlx5_vfio_get_clock; 18 | mlx5_access_reg; 19 | mlx5dv_dr_table_get_id; 20 | + mlx5_devx_get_obj_id; 21 | } MLX5_1.23; 22 | diff --git a/providers/mlx5/mlx5.c b/providers/mlx5/mlx5.c 23 | index 827712e0..242e553c 100644 24 | --- a/providers/mlx5/mlx5.c 25 | +++ b/providers/mlx5/mlx5.c 26 | @@ -247,6 +247,11 @@ int mlx5_cmd_status_to_err(uint8_t status) 27 | } 28 | } 29 | 30 | +uint32_t mlx5_devx_get_obj_id(struct mlx5dv_devx_obj *obj) 31 | +{ 32 | + return obj->object_id; 33 | +} 34 | + 35 | int mlx5_get_cmd_status_err(int err, void *out) 36 | { 37 | if (err == EREMOTEIO) 38 | diff --git a/providers/mlx5/mlx5dv.h b/providers/mlx5/mlx5dv.h 39 | index 7371b1a2..111493e6 100644 40 | --- a/providers/mlx5/mlx5dv.h 41 | +++ b/providers/mlx5/mlx5dv.h 42 | @@ -2219,6 +2219,7 @@ struct mlx5dv_devx_msi_vector { 43 | }; 44 | 45 | extern uint32_t mlx5dv_dr_table_get_id(struct mlx5dv_dr_table *tbl); 46 | +extern uint32_t mlx5_devx_get_obj_id(struct mlx5dv_devx_obj *obj); 47 | 48 | struct mlx5dv_devx_msi_vector * 49 | mlx5dv_devx_alloc_msi_vector(struct ibv_context *ibctx); 50 | -- 51 | 2.34.1 52 | 53 | -------------------------------------------------------------------------------- /build/patches/rdma-core/0005-increase-max-number-of-qps-cqs.patch: -------------------------------------------------------------------------------- 1 | From 150491685a48592a331bb919da78264221924290 Mon Sep 17 00:00:00 2001 2 | From: Josh Fried 3 | Date: Wed, 9 Aug 2023 21:45:10 +0000 4 | Subject: [PATCH 5/5] increase max number of qps/cqs 5 | 6 | --- 7 | providers/mlx5/mlx5_vfio.c | 2 ++ 8 | 1 file changed, 2 insertions(+) 9 | 10 | diff --git a/providers/mlx5/mlx5_vfio.c b/providers/mlx5/mlx5_vfio.c 11 | index 614bd604..b0a94965 100644 12 | --- a/providers/mlx5/mlx5_vfio.c 13 | +++ b/providers/mlx5/mlx5_vfio.c 14 | @@ -1939,6 +1939,8 @@ static int handle_hca_cap(struct mlx5_vfio_context *ctx, void *set_ctx, int set_ 15 | DEVX_SET(cmd_hca_cap, set_hca_cap, disable_link_up_by_init_hca, 1); 16 | 17 | DEVX_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, sys_page_shift - 12); 18 | + DEVX_SET(cmd_hca_cap, set_hca_cap, log_max_qp, MLX5_VFIO_CAP_GEN_MAX(ctx, log_max_qp)); 19 | + DEVX_SET(cmd_hca_cap, set_hca_cap, log_max_cq, MLX5_VFIO_CAP_GEN_MAX(ctx, log_max_cq)); 20 | 21 | if (MLX5_VFIO_CAP_GEN_MAX(ctx, mkey_by_name)) 22 | DEVX_SET(cmd_hca_cap, set_hca_cap, mkey_by_name, 1); 23 | -- 24 | 2.39.2 25 | 26 | -------------------------------------------------------------------------------- /client.config: -------------------------------------------------------------------------------- 1 | # an example runtime config file 2 | host_addr 192.168.1.7 3 | host_netmask 255.255.255.0 4 | host_gateway 192.168.1.1 5 | runtime_kthreads 6 6 | runtime_spinning_kthreads 6 7 | runtime_guaranteed_kthreads 6 8 | runtime_priority lc 9 | -------------------------------------------------------------------------------- /inc/asm/atomic.h: -------------------------------------------------------------------------------- 1 | /* 2 | * atomic.h - utilities for atomic memory ops 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | /** 11 | * mb - a memory barrier 12 | * 13 | * Ensures all loads and stores before the barrier complete 14 | * before all loads and stores after the barrier. 15 | */ 16 | #define mb() asm volatile("mfence" ::: "memory") 17 | 18 | /** 19 | * rmb - a read memory barrier 20 | * 21 | * Ensures all loads before the barrier complete before 22 | * all loads after the barrier. 23 | */ 24 | #define rmb() barrier() 25 | 26 | /** 27 | * wmb - a write memory barrier 28 | * 29 | * Ensures all stores before the barrier complete before 30 | * all stores after the barrier. 31 | */ 32 | #define wmb() barrier() 33 | 34 | /** 35 | * store_release - store a native value with release fence semantics 36 | * @p: the pointer to store 37 | * @v: the value to store 38 | */ 39 | #define store_release(p, v) \ 40 | do { \ 41 | BUILD_ASSERT(type_is_native(*p)); \ 42 | barrier(); \ 43 | ACCESS_ONCE(*p) = v; \ 44 | } while (0) 45 | 46 | /** 47 | * load_acquire - load a native value with acquire fence semantics 48 | * @p: the pointer to load 49 | */ 50 | #define load_acquire(p) \ 51 | ({ \ 52 | BUILD_ASSERT(type_is_native(*p)); \ 53 | typeof(*p) __p = ACCESS_ONCE(*p); \ 54 | barrier(); \ 55 | __p; \ 56 | }) 57 | 58 | /** 59 | * load_consume - load a native value with consume fence semantics 60 | * @p: the pointer to load 61 | */ 62 | #define load_consume(p) \ 63 | ({ \ 64 | BUILD_ASSERT(type_is_native(*p)); \ 65 | typeof(*p) __p = ACCESS_ONCE(*p); \ 66 | barrier(); \ 67 | __p; \ 68 | }) 69 | -------------------------------------------------------------------------------- /inc/asm/chksum.h: -------------------------------------------------------------------------------- 1 | /* 2 | * chksum.h - utilities for calculating checksums 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | /** 10 | * chksum_internet - performs an internet checksum on a buffer 11 | * @buf: the buffer 12 | * @len: the length in bytes 13 | * 14 | * An internet checksum is a 16-bit one's complement sum. Details 15 | * are described in RFC 1071. 16 | * 17 | * Returns a 16-bit checksum value. 18 | */ 19 | static inline uint16_t chksum_internet(const void *buf, int len) 20 | { 21 | uint64_t sum; 22 | 23 | asm volatile("xorq %0, %0\n" 24 | 25 | /* process 8 byte chunks */ 26 | "movl %2, %%edx\n" 27 | "shrl $3, %%edx\n" 28 | "cmp $0, %%edx\n" 29 | "jz 2f\n" 30 | "1: adcq (%1), %0\n" 31 | "leaq 8(%1), %1\n" 32 | "decl %%edx\n" 33 | "jne 1b\n" 34 | "adcq $0, %0\n" 35 | 36 | /* process 4 byte (if left) */ 37 | "2: test $4, %2\n" 38 | "je 3f\n" 39 | "movl (%1), %%edx\n" 40 | "addq %%rdx, %0\n" 41 | "adcq $0, %0\n" 42 | "leaq 4(%1), %1\n" 43 | 44 | /* process 2 byte (if left) */ 45 | "3: test $2, %2\n" 46 | "je 4f\n" 47 | "movzwq (%1), %%rdx\n" 48 | "addq %%rdx, %0\n" 49 | "adcq $0, %0\n" 50 | "leaq 2(%1), %1\n" 51 | 52 | /* process 1 byte (if left) */ 53 | "4: test $1, %2\n" 54 | "je 5f\n" 55 | "movzbq (%1), %%rdx\n" 56 | "addq %%rdx, %0\n" 57 | "adcq $0, %0\n" 58 | 59 | /* fold into 16-bit answer */ 60 | "5: movq %0, %1\n" 61 | "shrq $32, %0\n" 62 | "addl %k1, %k0\n" 63 | "adcl $0, %k0\n" 64 | "movq %0, %1\n" 65 | "shrl $16, %k0\n" 66 | "addw %w1, %w0\n" 67 | "adcw $0, %w0\n" 68 | "not %0\n" 69 | 70 | : "=&r"(sum), "=r"(buf) 71 | : "r"(len), "1"(buf) : "%rdx", "cc", "memory"); 72 | 73 | return (uint16_t)sum; 74 | } 75 | 76 | -------------------------------------------------------------------------------- /inc/asm/cpu.h: -------------------------------------------------------------------------------- 1 | /* 2 | * cpu.h - basic definitions for x86_64 CPUs 3 | */ 4 | 5 | #pragma once 6 | 7 | /* 8 | * Endianness 9 | */ 10 | 11 | #define __LITTLE_ENDIAN 1234 12 | #define __BIG_ENDIAN 4321 13 | 14 | #define __BYTE_ORDER __LITTLE_ENDIAN 15 | 16 | 17 | /* 18 | * Word Size 19 | */ 20 | 21 | #define __32BIT_WORDS 32 22 | #define __64BIT_WORDS 64 23 | 24 | #define __WORD_SIZE __64BIT_WORDS 25 | 26 | #define CACHE_LINE_SIZE 64 27 | -------------------------------------------------------------------------------- /inc/asm/ops.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ops.h - useful x86_64 instructions 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | static inline void cpu_relax(void) 11 | { 12 | #if __GNUC_PREREQ(10, 0) 13 | # if __has_builtin(__builtin_ia32_pause) 14 | __builtin_ia32_pause(); 15 | # endif 16 | #else 17 | asm volatile("pause"); 18 | #endif 19 | } 20 | 21 | static inline void cpu_serialize(void) 22 | { 23 | asm volatile("xorl %%eax, %%eax\n\t" 24 | "cpuid" : : : "%rax", "%rbx", "%rcx", "%rdx"); 25 | } 26 | 27 | struct cpuid_info { 28 | unsigned int eax, ebx, ecx, edx; 29 | }; 30 | 31 | static inline void cpuid(int leaf, int subleaf, struct cpuid_info *regs) 32 | { 33 | asm volatile("cpuid" : "=a" (regs->eax), "=b" (regs->ebx), 34 | "=c" (regs->ecx), "=d" (regs->edx) : "a" (leaf), 35 | "c"(subleaf)); 36 | } 37 | 38 | static inline uint64_t rdtsc(void) 39 | { 40 | #if __GNUC_PREREQ(10, 0) 41 | # if __has_builtin(__builtin_ia32_rdtsc) 42 | return __builtin_ia32_rdtsc(); 43 | # endif 44 | #else 45 | uint64_t a, d; 46 | asm volatile("rdtsc" : "=a" (a), "=d" (d)); 47 | return a | (d << 32); 48 | #endif 49 | } 50 | 51 | static inline uint64_t rdtscp(uint32_t *auxp) 52 | { 53 | uint64_t ret; 54 | uint32_t c; 55 | 56 | #if __GNUC_PREREQ(10, 0) 57 | # if __has_builtin(__builtin_ia32_rdtscp) 58 | ret = __builtin_ia32_rdtscp(&c); 59 | # endif 60 | #else 61 | uint64_t a, d; 62 | asm volatile("rdtscp" : "=a" (a), "=d" (d), "=c" (c)); 63 | ret = a | (d << 32); 64 | #endif 65 | 66 | if (auxp) 67 | *auxp = c; 68 | return ret; 69 | } 70 | 71 | static inline uint64_t __mm_crc32_u64(uint64_t crc, uint64_t val) 72 | { 73 | asm("crc32q %1, %0" : "+r" (crc) : "rm" (val)); 74 | return crc; 75 | } 76 | -------------------------------------------------------------------------------- /inc/base/compiler.h: -------------------------------------------------------------------------------- 1 | /* 2 | * compiler.h - useful compiler hints, intrinsics, and attributes 3 | */ 4 | 5 | #pragma once 6 | 7 | #ifndef likely 8 | #define likely(x) __builtin_expect(!!(x), 1) 9 | #endif 10 | #ifndef unlikely 11 | #define unlikely(x) __builtin_expect(!!(x), 0) 12 | #endif 13 | #ifndef __cplusplus 14 | #define unreachable() __builtin_unreachable() 15 | #endif 16 | 17 | #define prefetch0(x) __builtin_prefetch((x), 0, 3) 18 | #define prefetch1(x) __builtin_prefetch((x), 0, 2) 19 | #define prefetch2(x) __builtin_prefetch((x), 0, 1) 20 | #define prefetchnta(x) __builtin_prefetch((x), 0, 0) 21 | #define prefetch(x) prefetch0(x) 22 | 23 | /* variable attributes */ 24 | #ifndef __packed 25 | #define __packed __attribute__((packed)) 26 | #endif 27 | #define __notused __attribute__((unused)) 28 | #ifndef __aligned 29 | #define __aligned(x) __attribute__((aligned(x))) 30 | #endif 31 | 32 | /* function attributes */ 33 | #define __noinline __attribute__((noinline)) 34 | #define __noreturn __attribute__((noreturn)) 35 | #define __must_use_return __attribute__((warn_unused_result)) 36 | #define __pure __attribute__((pure)) 37 | #define __weak __attribute__((weak)) 38 | #define __malloc __attribute__((malloc)) 39 | #define __assume_aligned(x) __attribute__((assume_aligned(x))) 40 | #define __nofp __attribute__((target("general-regs-only"))) 41 | 42 | #define GCC_VERSION (__GNUC__ * 10000 \ 43 | + __GNUC_MINOR__ * 100 \ 44 | + __GNUC_PATCHLEVEL__) 45 | 46 | #if GCC_VERSION >= 40800 47 | #define HAS_BUILTIN_BSWAP 1 48 | #endif 49 | 50 | #define barrier() asm volatile("" ::: "memory") 51 | 52 | #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) 53 | 54 | #define type_is_native(t) \ 55 | (sizeof(t) == sizeof(char) || \ 56 | sizeof(t) == sizeof(short) || \ 57 | sizeof(t) == sizeof(int) || \ 58 | sizeof(t) == sizeof(long)) 59 | 60 | /* 61 | * These attributes are defined only with the sparse checker tool. 62 | */ 63 | #ifdef __CHECKER__ 64 | #define __rcu __attribute__((noderef, address_space(1))) 65 | #define __perthread __attribute__((noderef, address_space(2))) 66 | #define __force __attribute__((force)) 67 | #undef __assume_aligned 68 | #define __assume_aligned(x) 69 | #else /* __CHECKER__ */ 70 | #define __rcu 71 | #define __perthread 72 | #define __force 73 | #endif /* __CHECKER__ */ 74 | -------------------------------------------------------------------------------- /inc/base/cpu.h: -------------------------------------------------------------------------------- 1 | /* 2 | * cpu.h - detection for CPU topology 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | extern int cpu_count; /* the number of available CPUs */ 12 | extern int numa_count; /* the number of NUMA nodes */ 13 | extern int numa_count_with_mem; /* the number of NUMA nodes with local memory */ 14 | 15 | struct cpu_info { 16 | DEFINE_BITMAP(thread_siblings_mask, NCPU); 17 | DEFINE_BITMAP(core_siblings_mask, NCPU); 18 | int package; 19 | }; 20 | 21 | extern struct cpu_info cpu_info_tbl[NCPU]; 22 | -------------------------------------------------------------------------------- /inc/base/fd_transfer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * fd_transfer.h - utility functions for sending FDs across UNIX sockets. 3 | */ 4 | 5 | #pragma once 6 | 7 | extern int recv_fd(int controlfd, int *shared_fd_out); 8 | extern int send_fd(int controlfd, int shared_fd); 9 | -------------------------------------------------------------------------------- /inc/base/gen.h: -------------------------------------------------------------------------------- 1 | /* 2 | * gen.h - shared generation numbers 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | #include 10 | 11 | /* describes a generation number */ 12 | struct gen_num { 13 | uint32_t prev_gen; 14 | volatile uint32_t *gen; 15 | }; 16 | 17 | /* 18 | * gen_active - used by a writer to indicate that a generation is ongoing 19 | */ 20 | static inline void gen_active(struct gen_num *g) 21 | { 22 | if (*g->gen == 0) 23 | *g->gen = g->prev_gen + 1; 24 | } 25 | 26 | /* 27 | * gen_inactive - used by a writer to indicate that we are between generations 28 | */ 29 | static inline void gen_inactive(struct gen_num *g) 30 | { 31 | if (*g->gen != 0) { 32 | g->prev_gen = *g->gen; 33 | *g->gen = 0; 34 | } 35 | } 36 | 37 | /* 38 | * gen_in_same_gen - used by a reader to determine if we are in the same 39 | * generation as last time we checked 40 | * 41 | * Returns true if we are in the same generation as last time, false if we are 42 | * in a different generation or are between generations. 43 | */ 44 | static inline bool gen_in_same_gen(struct gen_num *g) 45 | { 46 | uint32_t current_gen = *g->gen; 47 | bool unchanged; 48 | 49 | unchanged = (current_gen != 0) && (current_gen == g->prev_gen); 50 | g->prev_gen = current_gen; 51 | 52 | return unchanged; 53 | } 54 | 55 | /* 56 | * gen_init - initialize a shared generation number 57 | */ 58 | static inline void gen_init(struct gen_num *g, uint32_t *gen) 59 | { 60 | g->prev_gen = 0; 61 | g->gen = gen; 62 | } 63 | -------------------------------------------------------------------------------- /inc/base/init.h: -------------------------------------------------------------------------------- 1 | /* 2 | * init.h - support for initialization 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | extern int base_init(void); 11 | extern int base_init_thread(void); 12 | extern void init_shutdown(int status) __noreturn; 13 | 14 | extern bool base_init_done; 15 | DECLARE_PERTHREAD(bool, thread_init_done); 16 | -------------------------------------------------------------------------------- /inc/base/kref.h: -------------------------------------------------------------------------------- 1 | /* 2 | * kref.h - generic support for reference counts 3 | * 4 | * This implementation is inspired by the following paper: 5 | * Kroah-Hartman, Greg, kobjects and krefs. Linux Symposium 2004 6 | */ 7 | 8 | #pragma once 9 | 10 | #include 11 | #include 12 | 13 | struct kref { 14 | atomic_t cnt; 15 | }; 16 | 17 | /** 18 | * kref_init - initializes the reference count to one 19 | * @ref: the kref 20 | */ 21 | static inline void 22 | kref_init(struct kref *ref) 23 | { 24 | atomic_write(&ref->cnt, 1); 25 | } 26 | 27 | /** 28 | * kref_initn - initializes the reference count to @n 29 | * @ref: the kref 30 | * @n: the initial reference count 31 | */ 32 | static inline void 33 | kref_initn(struct kref *ref, int n) 34 | { 35 | atomic_write(&ref->cnt, n); 36 | } 37 | 38 | /** 39 | * kref_get - atomically increments the reference count 40 | * @ref: the kref 41 | */ 42 | static inline void 43 | kref_get(struct kref *ref) 44 | { 45 | assert(atomic_read(&ref->cnt) > 0); 46 | atomic_inc(&ref->cnt); 47 | } 48 | 49 | /** 50 | * kref_put - atomically decrements the reference count, releasing the object 51 | * when it reaches zero 52 | * @ref: the kref 53 | * @release: a pointer to the release function 54 | */ 55 | static inline void 56 | kref_put(struct kref *ref, void (*release)(struct kref *ref)) 57 | { 58 | assert(release); 59 | if (atomic_dec_and_test(&ref->cnt)) 60 | release(ref); 61 | } 62 | 63 | /** 64 | * kref_released - has this kref been released? 65 | * @ref: the kref 66 | * 67 | * WARNING: this is unsafe without additional synchronization. For example, use 68 | * this function while holding a lock that prevents the release() function from 69 | * removing the object from the data structure you are accessing. 70 | * 71 | * Returns true if the reference count has dropped to zero. 72 | */ 73 | static inline bool 74 | kref_released(struct kref *ref) 75 | { 76 | return atomic_read(&ref->cnt) == 0; 77 | } 78 | -------------------------------------------------------------------------------- /inc/base/limits.h: -------------------------------------------------------------------------------- 1 | /* 2 | * limits.h - maximum limits for different resources 3 | */ 4 | 5 | #pragma once 6 | 7 | #define NCPU 256 /* max number of cpus */ 8 | #define NTHREAD 512 /* max number of threads */ 9 | #define NNUMA 4 /* max number of numa zones */ 10 | #define NSTAT 1024 /* max number of stat counters */ 11 | -------------------------------------------------------------------------------- /inc/base/lock.h: -------------------------------------------------------------------------------- 1 | /* 2 | * lock.h - locking primitives 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | #define SPINLOCK_INITIALIZER {.locked = 0} 11 | #define DEFINE_SPINLOCK(name) spinlock_t name = SPINLOCK_INITIALIZER 12 | #define DECLARE_SPINLOCK(name) extern spinlock_t name 13 | 14 | /** 15 | * spin_lock_init - prepares a spin lock for use 16 | * @l: the spin lock 17 | */ 18 | static inline void spin_lock_init(spinlock_t *l) 19 | { 20 | l->locked = 0; 21 | } 22 | 23 | /** 24 | * spin_lock_held - determines if the lock is held 25 | * @l: the spin lock 26 | * 27 | * Returns true if the lock is held. 28 | */ 29 | static inline bool spin_lock_held(const spinlock_t *l) 30 | { 31 | return l->locked != 0; 32 | } 33 | 34 | /** 35 | * assert_spin_lock_held - asserts that the lock is currently held 36 | * @l: the spin lock 37 | */ 38 | static inline void assert_spin_lock_held(spinlock_t *l) 39 | { 40 | assert(spin_lock_held(l)); 41 | } 42 | 43 | /** 44 | * spin_lock - takes a spin lock 45 | * @l: the spin lock 46 | */ 47 | static inline void spin_lock(spinlock_t *l) 48 | { 49 | while (__sync_lock_test_and_set(&l->locked, 1)) { 50 | while (l->locked) 51 | cpu_relax(); 52 | } 53 | } 54 | 55 | /** 56 | * spin_try_lock- takes a spin lock, but only if it is available 57 | * @l: the spin lock 58 | * 59 | * Returns 1 if successful, otherwise 0 60 | */ 61 | static inline bool spin_try_lock(spinlock_t *l) 62 | { 63 | if (!__sync_lock_test_and_set(&l->locked, 1)) 64 | return true; 65 | return false; 66 | } 67 | 68 | /** 69 | * spin_unlock - releases a spin lock 70 | * @l: the spin lock 71 | */ 72 | static inline void spin_unlock(spinlock_t *l) 73 | { 74 | assert_spin_lock_held(l); 75 | __sync_lock_release(&l->locked); 76 | } 77 | -------------------------------------------------------------------------------- /inc/base/mem.h: -------------------------------------------------------------------------------- 1 | /* 2 | * mem.h - memory management 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | enum { 10 | PGSHIFT_4KB = 12, 11 | PGSHIFT_2MB = 21, 12 | PGSHIFT_1GB = 30, 13 | }; 14 | 15 | enum { 16 | PGSIZE_4KB = (1 << PGSHIFT_4KB), /* 4096 bytes */ 17 | PGSIZE_2MB = (1 << PGSHIFT_2MB), /* 2097152 bytes */ 18 | PGSIZE_1GB = (1 << PGSHIFT_1GB), /* 1073741824 bytes */ 19 | }; 20 | 21 | extern bool cfg_transparent_hugepages_enabled; 22 | 23 | #define PGMASK_4KB (PGSIZE_4KB - 1) 24 | #define PGMASK_2MB (PGSIZE_2MB - 1) 25 | #define PGMASK_1GB (PGSIZE_1GB - 1) 26 | 27 | /* page numbers */ 28 | #define PGN_4KB(la) (((uintptr_t)(la)) >> PGSHIFT_4KB) 29 | #define PGN_2MB(la) (((uintptr_t)(la)) >> PGSHIFT_2MB) 30 | #define PGN_1GB(la) (((uintptr_t)(la)) >> PGSHIFT_1GB) 31 | 32 | #define PGOFF_4KB(la) (((uintptr_t)(la)) & PGMASK_4KB) 33 | #define PGOFF_2MB(la) (((uintptr_t)(la)) & PGMASK_2MB) 34 | #define PGOFF_1GB(la) (((uintptr_t)(la)) & PGMASK_1GB) 35 | 36 | #define PGADDR_4KB(la) (((uintptr_t)(la)) & ~((uintptr_t)PGMASK_4KB)) 37 | #define PGADDR_2MB(la) (((uintptr_t)(la)) & ~((uintptr_t)PGMASK_2MB)) 38 | #define PGADDR_1GB(la) (((uintptr_t)(la)) & ~((uintptr_t)PGMASK_1GB)) 39 | 40 | typedef unsigned long physaddr_t; /* physical addresses */ 41 | typedef unsigned long virtaddr_t; /* virtual addresses */ 42 | 43 | #ifndef MAP_FAILED 44 | #define MAP_FAILED ((void *)-1) 45 | #endif 46 | 47 | typedef unsigned int mem_key_t; 48 | 49 | extern void *mem_map_anom(void *base, size_t len, size_t pgsize, int node); 50 | extern void *mem_map_file(void *base, size_t len, int fd, off_t offset); 51 | extern void *mem_map_shm(mem_key_t key, void *base, size_t len, 52 | size_t pgsize, bool exclusive); 53 | extern void *mem_map_shm_rdonly(mem_key_t key, void *base, size_t len, 54 | size_t pgsize); 55 | extern int mem_unmap_shm(void *base); 56 | extern int mem_lookup_page_phys_addrs(void *addr, size_t len, size_t pgsize, 57 | physaddr_t *maddrs); 58 | extern void touch_mapping(void *base, size_t len, size_t pgsize); 59 | 60 | static inline int 61 | mem_lookup_page_phys_addr(void *addr, size_t pgsize, physaddr_t *paddr) 62 | { 63 | return mem_lookup_page_phys_addrs(addr, pgsize, pgsize, paddr); 64 | } 65 | -------------------------------------------------------------------------------- /inc/base/mempool.h: -------------------------------------------------------------------------------- 1 | /* 2 | * mempool.h - a simple, preallocated pool of memory 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | struct mempool { 11 | void **free_items; 12 | size_t allocated; 13 | size_t capacity; 14 | void *buf; 15 | size_t len; 16 | size_t pgsize; 17 | size_t item_len; 18 | }; 19 | 20 | #ifdef DEBUG 21 | extern void __mempool_alloc_debug_check(struct mempool *m, void *item); 22 | extern void __mempool_free_debug_check(struct mempool *m, void *item); 23 | #else /* DEBUG */ 24 | static inline void __mempool_alloc_debug_check(struct mempool *m, void *item) {} 25 | static inline void __mempool_free_debug_check(struct mempool *m, void *item) {} 26 | #endif /* DEBUG */ 27 | 28 | static inline bool mempool_member(struct mempool *m, void *addr) 29 | { 30 | return addr >= m->buf && addr < m->buf + m->len; 31 | } 32 | 33 | /** 34 | * mempool_alloc - allocates an item from the pool 35 | * @m: the memory pool to allocate from 36 | * 37 | * Returns an item, or NULL if the pool is empty. 38 | */ 39 | static inline void *mempool_alloc(struct mempool *m) 40 | { 41 | void *item; 42 | if (unlikely(m->allocated >= m->capacity)) 43 | return NULL; 44 | item = m->free_items[m->allocated++]; 45 | __mempool_alloc_debug_check(m, item); 46 | return item; 47 | } 48 | 49 | /** 50 | * mempool_free - returns an item to the pool 51 | * @m: the memory pool the item was allocated from 52 | * @item: the item to return 53 | */ 54 | static inline void mempool_free(struct mempool *m, void *item) 55 | { 56 | __mempool_free_debug_check(m, item); 57 | m->free_items[--m->allocated] = item; 58 | assert(m->allocated <= m->capacity); /* could have overflowed */ 59 | } 60 | 61 | extern int mempool_create(struct mempool *m, void *buf, size_t len, 62 | size_t pgsize, size_t item_len); 63 | extern void mempool_destroy(struct mempool *m); 64 | 65 | extern struct tcache *mempool_create_tcache(struct mempool *m, const char *name, 66 | unsigned int mag_size); 67 | -------------------------------------------------------------------------------- /inc/base/pci.h: -------------------------------------------------------------------------------- 1 | /* 2 | * pci.h - PCI bus support 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | struct pci_bar { 11 | uint64_t start; /* the start address, or zero if no resource */ 12 | uint64_t len; /* the length of the resource */ 13 | uint64_t flags; /* Linux resource flags */ 14 | }; 15 | 16 | /* NOTE: these are the same as the Linux PCI sysfs resource flags */ 17 | #define PCI_BAR_IO 0x00000100 18 | #define PCI_BAR_MEM 0x00000200 19 | #define PCI_BAR_PREFETCH 0x00002000 /* typically WC memory */ 20 | #define PCI_BAR_READONLY 0x00004000 /* typically option ROMs */ 21 | #define PCI_MAX_BARS 7 22 | 23 | struct pci_addr { 24 | uint16_t domain; 25 | uint8_t bus; 26 | uint8_t slot; 27 | uint8_t func; 28 | } __packed; 29 | 30 | extern int pci_str_to_addr(const char *str, struct pci_addr *addr); 31 | 32 | struct pci_dev { 33 | struct pci_addr addr; 34 | struct kref ref; 35 | 36 | uint16_t vendor_id; 37 | uint16_t device_id; 38 | uint16_t subsystem_vendor_id; 39 | uint16_t subsystem_device_id; 40 | 41 | struct pci_bar bars[PCI_MAX_BARS]; 42 | int numa_node; 43 | int max_vfs; 44 | }; 45 | 46 | extern struct pci_dev *pci_alloc_dev(const struct pci_addr *addr); 47 | extern void pci_release_dev(struct kref *ref); 48 | extern struct pci_bar *pci_find_mem_bar(struct pci_dev *dev, int count); 49 | extern void *pci_map_mem_bar(struct pci_dev *dev, struct pci_bar *bar, bool wc); 50 | extern void pci_unmap_mem_bar(struct pci_bar *bar, void *vaddr); 51 | 52 | /** 53 | * pci_dev_get - increments the PCI device refcount 54 | * @dev: the PCI device 55 | * 56 | * Returns the device. 57 | */ 58 | static inline struct pci_dev *pci_dev_get(struct pci_dev *dev) 59 | { 60 | kref_get(&dev->ref); 61 | return dev; 62 | } 63 | 64 | /** 65 | * pci_dev_put - decrements the PCI device refcount, freeing at zero 66 | * @dev: the PCI device 67 | */ 68 | static inline void pci_dev_put(struct pci_dev *dev) 69 | { 70 | kref_put(&dev->ref, pci_release_dev); 71 | } 72 | -------------------------------------------------------------------------------- /inc/base/signal.h: -------------------------------------------------------------------------------- 1 | /* 2 | * signal.h - support for setting up signal handlers without using glibc 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | extern int base_sigaction(int sig, const struct sigaction *act, 10 | struct sigaction *oact); 11 | extern int base_sigaction_full(int sig, const struct sigaction *act, 12 | struct sigaction *oact); -------------------------------------------------------------------------------- /inc/base/slab.h: -------------------------------------------------------------------------------- 1 | /* 2 | * slab.h - a SLAB allocator 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | /* forward declarations */ 13 | struct slab_hdr; 14 | struct slab_node; 15 | struct tcache; 16 | 17 | 18 | /* 19 | * slab support 20 | */ 21 | 22 | #define SLAB_CHUNK_SIZE 8 23 | #define SLAB_MIN_SIZE 16 24 | 25 | /* function attributes for methods that allocate slab items */ 26 | #define __slab_malloc __malloc __assume_aligned(SLAB_MIN_SIZE) 27 | 28 | /* Slab nodes are per-numa node slab internal state. */ 29 | struct slab_node { 30 | size_t size; 31 | int numa_node; 32 | int offset; 33 | int flags; 34 | int nr_elems; 35 | spinlock_t page_lock; 36 | 37 | /* slab pages */ 38 | off_t pg_off; 39 | struct page *cur_pg; 40 | struct list_head full_list; 41 | struct list_head partial_list; 42 | int nr_pages; 43 | }; 44 | 45 | struct slab { 46 | const char *name; 47 | size_t size; 48 | struct list_node link; 49 | struct slab_node *nodes[NNUMA]; 50 | } __aligned(CACHE_LINE_SIZE); 51 | 52 | /* force the slab to be backed with large pages */ 53 | #define SLAB_FLAG_LGPAGE BIT(0) 54 | /* false sharing is okay (less internal fragmentation) */ 55 | #define SLAB_FLAG_FALSE_OKAY BIT(1) 56 | /* managing 4kb pages (internal use only) */ 57 | #define SLAB_FLAG_PAGES BIT(2) 58 | 59 | extern int slab_create(struct slab *s, const char *name, size_t size, int flags); 60 | extern void slab_destroy(struct slab *s); 61 | extern int slab_reclaim(struct slab *s); 62 | extern void *slab_alloc_on_node(struct slab *s, int numa_node) __slab_malloc; 63 | extern void slab_free(struct slab *s, void *item); 64 | extern void slab_print_usage(void); 65 | 66 | /** 67 | * slab_alloc - allocates an item on the local NUMA node 68 | * @s: the slab to allocate from 69 | * 70 | * Returns an item or NULL if out of memory. 71 | */ 72 | static __always_inline void *slab_alloc(struct slab *s) 73 | { 74 | return slab_alloc_on_node(s, this_numa_node()); 75 | } 76 | 77 | struct tcache *slab_create_tcache(struct slab *s, unsigned int mag_size); 78 | -------------------------------------------------------------------------------- /inc/base/stat.h: -------------------------------------------------------------------------------- 1 | /* 2 | * stat.h - statistics counter support 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | 13 | /* 14 | * Generic stat counter API 15 | */ 16 | 17 | struct stat_entry; 18 | typedef uint64_t (*stat_collect_fn_t)(struct stat_entry *e, unsigned long data); 19 | 20 | struct stat_entry { 21 | const char *name; 22 | stat_collect_fn_t handler; 23 | unsigned long data; 24 | struct list_node link; 25 | }; 26 | 27 | extern int stat_register(struct stat_entry *entry); 28 | extern void stat_unregister(struct stat_entry *entry); 29 | extern uint64_t stat_collect(struct stat_entry *entry); 30 | 31 | struct stat_result { 32 | const char *name; 33 | uint64_t val; 34 | }; 35 | 36 | extern int stat_collect_all(struct stat_result *results_out, int capacity); 37 | extern void stat_print_all(void); 38 | 39 | 40 | /* 41 | * Some common stat collectors 42 | */ 43 | 44 | extern uint64_t __stat_var_collect(struct stat_entry *e, unsigned long data); 45 | extern uint64_t __stat_perthread_var_collect(struct stat_entry *e, 46 | unsigned long data); 47 | 48 | /** 49 | * stat_register_var - registers a stat backed by a uint64_t 50 | * @entry: the stat entry struct to register 51 | * @name: a human-readable name for the stat 52 | * @val: the uint64_t value that stores the count 53 | * 54 | * Returns 0 if successful, otherwise fail. 55 | */ 56 | static inline int 57 | stat_register_var(struct stat_entry *entry, const char *name, uint64_t *val) 58 | { 59 | entry->name = name; 60 | entry->handler = __stat_var_collect; 61 | entry->data = (unsigned long)val; 62 | return stat_register(entry); 63 | } 64 | 65 | /** 66 | * stat_register_perthread_var - registers a stat backed by a perthread uint64_t 67 | * @entry: the stat entry struct to register 68 | * @name: a human-readable name for the stat 69 | * @val: the perthread uint64_t value that stores the count 70 | * 71 | * Returns 0 if successful, otherwise fail. 72 | */ 73 | static inline int 74 | stat_register_perthread_var(struct stat_entry *entry, const char *name, 75 | uint64_t __perthread *val) 76 | { 77 | entry->name = name; 78 | entry->handler = __stat_perthread_var_collect; 79 | entry->data = (__force unsigned long)val; 80 | return stat_register(entry); 81 | } 82 | -------------------------------------------------------------------------------- /inc/base/syscall.h: -------------------------------------------------------------------------------- 1 | /* 2 | * syscall.h - support for common syscalls in the base library 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | extern const char base_syscall_start[]; 11 | extern const char base_syscall_end[]; 12 | 13 | extern void *syscall_mmap(void *addr, size_t length, int prot, int flags, 14 | int fd, off_t offset); 15 | extern long syscall_mbind(void *start, size_t len, int mode, 16 | const unsigned long *nmask, unsigned long maxnode, 17 | unsigned flags); 18 | extern void syscall_rt_sigreturn(void); 19 | extern int syscall_ioctl(int fd, unsigned long int request, void *arg); 20 | extern int syscall_madvise(void *addr, size_t length, int advice); 21 | extern int syscall_mprotect(void *addr, size_t len, int prot); -------------------------------------------------------------------------------- /inc/base/sysfs.h: -------------------------------------------------------------------------------- 1 | /* 2 | * sysfs.h - utilities for accessing sysfs 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | #define SYSFS_PCI_PATH "/sys/bus/pci/devices" 10 | #define SYSFS_CPU_TOPOLOGY_PATH "/sys/devices/system/cpu/cpu%d/topology" 11 | #define SYSFS_NODE_PATH "/sys/devices/system/node/node%d" 12 | 13 | extern int sysfs_parse_val(const char *path, uint64_t *val_out); 14 | extern int sysfs_parse_bitlist(const char *path, unsigned long *bits, 15 | int nbits); 16 | -------------------------------------------------------------------------------- /inc/base/time.h: -------------------------------------------------------------------------------- 1 | /* 2 | * time.h - timekeeping utilities 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | #define ONE_SECOND 1000000 11 | #define ONE_MS 1000 12 | #define ONE_US 1 13 | 14 | extern int cycles_per_us; 15 | extern uint64_t start_tsc; 16 | 17 | /** 18 | * microtime - gets the number of microseconds since the process started 19 | * This routine is very inexpensive, even compared to clock_gettime(). 20 | */ 21 | static inline uint64_t microtime(void) 22 | { 23 | return (rdtsc() - start_tsc) / cycles_per_us; 24 | } 25 | 26 | extern void __time_delay_us(uint64_t us); 27 | 28 | /** 29 | * delay_us - pauses the CPU for microseconds 30 | * @us: the number of microseconds 31 | */ 32 | static inline void delay_us(uint64_t us) 33 | { 34 | __time_delay_us(us); 35 | } 36 | 37 | /** 38 | * delay_ms - pauses the CPU for milliseconds 39 | * @ms: the number of milliseconds 40 | */ 41 | static inline void delay_ms(uint64_t ms) 42 | { 43 | /* TODO: yield instead of spin */ 44 | __time_delay_us(ms * ONE_MS); 45 | } 46 | -------------------------------------------------------------------------------- /inc/base/types.h: -------------------------------------------------------------------------------- 1 | /* 2 | * types.h - primitive type definitions 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | typedef unsigned char uint8_t; 11 | typedef unsigned short uint16_t; 12 | typedef unsigned int uint32_t; 13 | 14 | typedef signed char int8_t; 15 | typedef signed short int16_t; 16 | typedef signed int int32_t; 17 | 18 | #ifndef __WORD_SIZE 19 | #error __WORD_SIZE is undefined 20 | #endif 21 | 22 | #if __WORD_SIZE == __64BIT_WORDS 23 | 24 | typedef unsigned long uint64_t; 25 | typedef signed long int64_t; 26 | 27 | #else /* __WORDSIZE == __64BIT_WORDS */ 28 | 29 | typedef unsigned long long uint64_t; 30 | typedef signed long long int64_t; 31 | 32 | #endif /* __WORDSIZE == __64BIT_WORDS */ 33 | 34 | typedef unsigned long uintptr_t; 35 | typedef long intptr_t; 36 | typedef long off_t; 37 | typedef unsigned long size_t; 38 | typedef long ssize_t; 39 | 40 | typedef struct { 41 | volatile int locked; 42 | } spinlock_t; 43 | 44 | typedef struct { 45 | volatile int cnt; 46 | } atomic_t; 47 | 48 | typedef struct { 49 | volatile long cnt; 50 | } atomic64_t; 51 | -------------------------------------------------------------------------------- /inc/iokernel/directpath.h: -------------------------------------------------------------------------------- 1 | /* 2 | * directpath.h - definitions for directpath structures 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | #include 10 | 11 | #define DIRECTPATH_STRIDE_RQ_NUM_DESC 128 12 | #define DIRECTPATH_STRIDE_MODE_BUF_SZ 16384 13 | #define DIRECTPATH_STRIDE_SIZE 256 14 | 15 | #define DIRECTPATH_NUM_STRIDES \ 16 | (DIRECTPATH_STRIDE_MODE_BUF_SZ / DIRECTPATH_STRIDE_SIZE) 17 | 18 | #define DIRECTPATH_STRIDE_SHIFT (__builtin_ctz(DIRECTPATH_NUM_STRIDES)) 19 | 20 | #define DIRECTPATH_TOTAL_RX_EL \ 21 | (DIRECTPATH_NUM_STRIDES * DIRECTPATH_STRIDE_RQ_NUM_DESC) 22 | #define DIRECTPATH_STRIDE_REFILL_THRESH_HI \ 23 | (DIRECTPATH_TOTAL_RX_EL * 1 / 4) 24 | 25 | #define DIRECTPATH_STRIDE_RX_BUF_POOL_SZ \ 26 | (2 * DIRECTPATH_STRIDE_RQ_NUM_DESC * DIRECTPATH_STRIDE_MODE_BUF_SZ) 27 | 28 | BUILD_ASSERT(DIRECTPATH_STRIDE_MODE_BUF_SZ % DIRECTPATH_STRIDE_SIZE == 0); 29 | BUILD_ASSERT(PGSIZE_2MB % DIRECTPATH_STRIDE_MODE_BUF_SZ == 0); 30 | BUILD_ASSERT(DIRECTPATH_STRIDE_SIZE >= 64); 31 | 32 | struct directpath_ring_q_spec { 33 | shmptr_t buf; 34 | shmptr_t dbrec; 35 | uint64_t nr_entries; 36 | uint32_t stride; 37 | }; 38 | 39 | struct directpath_queue_spec { 40 | uint32_t sqn; 41 | uint32_t uarn; 42 | uint32_t uar_offset; 43 | struct directpath_ring_q_spec rx_wq; 44 | struct directpath_ring_q_spec rx_cq; 45 | struct directpath_ring_q_spec tx_wq; 46 | struct directpath_ring_q_spec tx_cq; 47 | }; 48 | 49 | struct directpath_spec { 50 | uint32_t mr; 51 | size_t va_base; 52 | size_t memfd_region_size; 53 | 54 | /* bar map */ 55 | off_t offs; 56 | size_t bar_map_size; 57 | 58 | struct directpath_ring_q_spec rmp; 59 | 60 | shmptr_t buf_region; 61 | size_t rx_buf_region_size; 62 | size_t tx_buf_region_size; 63 | 64 | struct directpath_queue_spec qs[]; 65 | }; 66 | -------------------------------------------------------------------------------- /inc/net/arp.h: -------------------------------------------------------------------------------- 1 | /* 2 | * arp.h - Address Resolution Protocol (RFC 826, RFC 903) 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | struct arp_hdr { 12 | uint16_t htype; 13 | uint16_t ptype; /* the ETHERTYPE */ 14 | uint8_t hlen; 15 | uint8_t plen; 16 | uint16_t op; 17 | 18 | /* 19 | * Variable length fields continue as follows: 20 | * sender hw addr: hlen bytes 21 | * sender protocol addr: plen bytes 22 | * target hw addr: hlen bytes 23 | * target protocol addr: plen bytes 24 | */ 25 | } __packed; 26 | 27 | struct arp_hdr_ethip { 28 | struct eth_addr sender_mac; 29 | uint32_t sender_ip; 30 | struct eth_addr target_mac; 31 | uint32_t target_ip; 32 | } __packed; 33 | 34 | #define ARP_HTYPE_ETHER 1 /* ethernet */ 35 | #define ARP_HTYPE_IEEE802 6 /* token-ring */ 36 | #define ARP_HTYPE_ARCNET 7 /* arcnet */ 37 | #define ARP_HTYPE_FRELAY 16 /* frame relay */ 38 | #define ARP_HTYPE_IEEE1394 24 /* firewire */ 39 | #define ARP_HTYPE_INFINIBAND 32 /* infiniband */ 40 | 41 | enum { 42 | ARP_OP_REQUEST = 1, /* request hw addr given protocol addr */ 43 | ARP_OP_REPLY = 2, /* response hw addr given protocol addr */ 44 | ARP_OP_REVREQUEST = 3, /* request protocol addr given hw addr */ 45 | ARP_OP_REVREPLY = 4, /* response protocol addr given hw addr */ 46 | }; 47 | -------------------------------------------------------------------------------- /inc/net/mbufq.h: -------------------------------------------------------------------------------- 1 | /* 2 | * mbufq.h - singly-linked queue of MBUFs 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | struct mbuf; 10 | 11 | struct mbufq { 12 | struct mbuf *head, *tail; 13 | }; 14 | 15 | /** 16 | * mbufq_push_tail - push an mbuf to the tail of the queue 17 | * @q: the mbuf queue 18 | * @m: the mbuf to push 19 | */ 20 | static inline void mbufq_push_tail(struct mbufq *q, struct mbuf *m) 21 | { 22 | m->next = NULL; 23 | if (!q->head) { 24 | q->head = q->tail = m; 25 | return; 26 | } 27 | q->tail->next = m; 28 | q->tail = m; 29 | } 30 | 31 | /** 32 | * mbufq_pop_head - pop an mbuf from the head of the queue 33 | * @q: the mbuf queue 34 | * 35 | * Returns an mbuf or NULL if the queue is empty. 36 | */ 37 | static inline struct mbuf *mbufq_pop_head(struct mbufq *q) 38 | { 39 | struct mbuf *head = q->head; 40 | if (!head) 41 | return NULL; 42 | q->head = head->next; 43 | return head; 44 | } 45 | 46 | /** 47 | * mbufq_peak_head - reads the head of the queue without popping 48 | * @q: the mbuf queue 49 | * 50 | * Returns an mbuf or NULL if the queue is empty. 51 | */ 52 | static inline struct mbuf *mbufq_peak_head(struct mbufq *q) 53 | { 54 | return q->head; 55 | } 56 | 57 | /** 58 | * mbufq_merge_to_tail - merges a queue to the end of another queue 59 | * @dst: the destination queue (will contain all the mbufs) 60 | * @src: the source queue (will become empty) 61 | */ 62 | static inline void mbufq_merge_to_tail(struct mbufq *dst, struct mbufq *src) 63 | { 64 | if (!src->head) 65 | return; 66 | if (!dst->head) 67 | dst->head = src->head; 68 | else 69 | dst->tail->next = src->head; 70 | dst->tail = src->tail; 71 | src->head = NULL; 72 | } 73 | 74 | /** 75 | * mbufq_empty - returns true if the queue is empty 76 | */ 77 | static inline bool mbufq_empty(struct mbufq *q) 78 | { 79 | return q->head == NULL; 80 | } 81 | 82 | /** 83 | * mbufq_release - frees all the mbufs in the queue 84 | * @q: the queue to release 85 | */ 86 | static inline void mbufq_release(struct mbufq *q) 87 | { 88 | struct mbuf *m; 89 | while (true) { 90 | m = mbufq_pop_head(q); 91 | if (!m) 92 | break; 93 | mbuf_free(m); 94 | } 95 | } 96 | 97 | /** 98 | * mbufq_init - initializes a queue 99 | * @q: the mbuf queue to initialize 100 | */ 101 | static inline void mbufq_init(struct mbufq *q) 102 | { 103 | q->head = NULL; 104 | } 105 | -------------------------------------------------------------------------------- /inc/net/ping.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | 7 | struct ping_payload { 8 | struct timeval tx_time; 9 | }; 10 | 11 | int net_ping_init(); 12 | void net_send_ping(uint16_t seq_num, uint32_t daddr); 13 | void net_recv_ping(const struct ping_payload *payload, 14 | const struct icmp_pkt *icmp_pkt); 15 | -------------------------------------------------------------------------------- /inc/net/tcp.h: -------------------------------------------------------------------------------- 1 | /* 2 | * tcp.h - Transmission Control Protocol (TCP) definitions 3 | * 4 | * Based on Freebsd, BSD licensed. 5 | */ 6 | 7 | #pragma once 8 | 9 | #include 10 | 11 | typedef uint32_t tcp_seq; 12 | 13 | /* 14 | * TCP header. 15 | * Per RFC 793, September, 1981. 16 | */ 17 | struct tcp_hdr { 18 | uint16_t sport; /* source port */ 19 | uint16_t dport; /* destination port */ 20 | tcp_seq seq; /* sequence number */ 21 | tcp_seq ack; /* acknowledgement number */ 22 | #if __BYTE_ORDER == __LITTLE_ENDIAN 23 | uint8_t x2:4, /* (unused) */ 24 | off:4; /* data offset */ 25 | #endif 26 | #if __BYTE_ORDER == __BIG_ENDIAN 27 | uint8_t off:4, /* data offset */ 28 | x2:4; /* (unused) */ 29 | #endif 30 | uint8_t flags; 31 | #define TCP_FIN 0x01 32 | #define TCP_SYN 0x02 33 | #define TCP_RST 0x04 34 | #define TCP_PUSH 0x08 35 | #define TCP_ACK 0x10 36 | #define TCP_URG 0x20 37 | #define TCP_ECE 0x40 38 | #define TCP_CWR 0x80 39 | #define TCP_FLAGS \ 40 | (TCP_FIN|TCP_SYN|TCP_RST|TCP_PUSH|TCP_ACK|TCP_URG|TCP_ECE|TCP_CWR) 41 | #define PRINT_TCP_FLAGS "\20\1FIN\2SYN\3RST\4PUSH\5ACK\6URG\7ECE\10CWR" 42 | 43 | uint16_t win; /* window */ 44 | uint16_t sum; /* checksum */ 45 | uint16_t urp; /* urgent pointer */ 46 | }; 47 | 48 | /* 49 | * TCP options. 50 | */ 51 | #define TCP_OPT_EOL 0 /* end of options */ 52 | #define TCP_OPT_NOP 1 /* used for padding */ 53 | #define TCP_OPT_MSS 2 /* maximum segment size negotiation */ 54 | #define TCP_OPT_WSCALE 3 /* window scaling factor */ 55 | 56 | #define TCP_OLEN_MSS 4 57 | #define TCP_OLEN_WSCALE 3 58 | -------------------------------------------------------------------------------- /inc/net/udp.h: -------------------------------------------------------------------------------- 1 | /* 2 | * udp.h - User Datagram Protocol 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | struct udp_hdr { 10 | uint16_t src_port; 11 | uint16_t dst_port; 12 | uint16_t len; 13 | uint16_t chksum; 14 | }; 15 | -------------------------------------------------------------------------------- /inc/runtime/gc.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | struct kthread; 6 | 7 | // #define GC 1 8 | 9 | #ifdef GC 10 | 11 | /* External API */ 12 | typedef void (*stack_bounds_cb)(uint64_t bottom, uint64_t top); 13 | extern void gc_stop_world(void); 14 | extern void gc_start_world(void); 15 | 16 | /* reports each active stack to discover_cb */ 17 | extern void gc_discover_all_stacks(stack_bounds_cb discover_cb); 18 | 19 | /* Internal API */ 20 | extern volatile bool world_stopped; 21 | extern volatile uint64_t gc_gen; 22 | 23 | static inline bool is_world_stopped(void) 24 | { 25 | return ACCESS_ONCE(world_stopped); 26 | } 27 | 28 | static inline uint64_t get_gc_gen(void) 29 | { 30 | return ACCESS_ONCE(gc_gen); 31 | } 32 | 33 | extern int gc_register_thread(thread_t *th); 34 | extern int gc_remove_thread(thread_t *th); 35 | extern void gc_kthread_report(struct kthread *k); 36 | 37 | #else 38 | static inline int gc_register_thread(thread_t *th) 39 | { 40 | return 0; 41 | } 42 | static inline int gc_remove_thread(thread_t *th) 43 | { 44 | return 0; 45 | } 46 | static inline void gc_kthread_report(struct kthread *k) {} 47 | static inline bool is_world_stopped(void) 48 | { 49 | return false; 50 | } 51 | 52 | #endif 53 | -------------------------------------------------------------------------------- /inc/runtime/net.h: -------------------------------------------------------------------------------- 1 | /* 2 | * net.h - shared network definitions 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | struct netaddr { 10 | uint32_t ip; 11 | uint16_t port; 12 | }; 13 | 14 | extern int str_to_netaddr(const char *str, struct netaddr *addr); 15 | -------------------------------------------------------------------------------- /inc/runtime/poll.h: -------------------------------------------------------------------------------- 1 | /* 2 | * poll.h - support for event polling (similar to select/epoll/poll, etc.) 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | typedef struct poll_waiter { 13 | spinlock_t lock; 14 | struct list_head triggered; 15 | thread_t *waiting_th; 16 | } poll_waiter_t; 17 | 18 | typedef struct poll_trigger { 19 | struct list_node link; 20 | struct poll_waiter *waiter; 21 | bool triggered; 22 | unsigned long data; 23 | } poll_trigger_t; 24 | 25 | 26 | /* 27 | * Waiter API 28 | */ 29 | 30 | extern void poll_init(poll_waiter_t *w); 31 | extern void poll_arm(poll_waiter_t *w, poll_trigger_t *t, unsigned long data); 32 | extern void poll_disarm(poll_trigger_t *t); 33 | extern unsigned long poll_wait(poll_waiter_t *w); 34 | 35 | 36 | /* 37 | * Trigger API 38 | */ 39 | 40 | /** 41 | * poll_trigger_init - initializes a trigger 42 | * @t: the trigger to initialize 43 | */ 44 | static inline void poll_trigger_init(poll_trigger_t *t) 45 | { 46 | t->waiter = NULL; 47 | t->triggered = false; 48 | } 49 | 50 | extern void poll_trigger(poll_waiter_t *w, poll_trigger_t *t); 51 | -------------------------------------------------------------------------------- /inc/runtime/preempt.h: -------------------------------------------------------------------------------- 1 | /* 2 | * preempt.h - support for kthread preemption 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | DECLARE_PERTHREAD(unsigned int, preempt_cnt); 11 | DECLARE_PERTHREAD(void *, uintr_stack); 12 | 13 | extern void preempt(void); 14 | extern void uintr_asm_return(void); 15 | 16 | extern size_t xsave_max_size; 17 | extern size_t xsave_features; 18 | 19 | /* this flag is set whenever there is _not_ a pending preemption */ 20 | #define PREEMPT_NOT_PENDING (1 << 31) 21 | 22 | /** 23 | * preempt_disable - disables preemption 24 | * 25 | * Can be nested. 26 | */ 27 | static __always_inline __nofp void preempt_disable(void) 28 | { 29 | asm volatile("addl $1, %%gs:__perthread_preempt_cnt(%%rip)" ::: "memory", "cc"); 30 | barrier(); 31 | } 32 | 33 | /** 34 | * preempt_enable_nocheck - reenables preemption without checking for conditions 35 | * 36 | * Can be nested. 37 | */ 38 | static inline void preempt_enable_nocheck(void) 39 | { 40 | barrier(); 41 | perthread_decr(preempt_cnt); 42 | } 43 | 44 | /** 45 | * preempt_enable - reenables preemption 46 | * 47 | * Can be nested. 48 | */ 49 | static __always_inline __nofp void preempt_enable(void) 50 | { 51 | #ifndef __GCC_ASM_FLAG_OUTPUTS__ 52 | preempt_enable_nocheck(); 53 | if (unlikely(perthread_read(preempt_cnt) == 0)) 54 | preempt(); 55 | #else 56 | int zero; 57 | barrier(); 58 | asm volatile("subl $1, %%gs:__perthread_preempt_cnt(%%rip)" 59 | : "=@ccz" (zero) :: "memory", "cc"); 60 | if (unlikely(zero)) 61 | preempt(); 62 | #endif 63 | } 64 | 65 | /** 66 | * preempt_needed - returns true if a preemption event is stuck waiting 67 | */ 68 | static inline bool preempt_needed(void) 69 | { 70 | return (perthread_read(preempt_cnt) & PREEMPT_NOT_PENDING) == 0; 71 | } 72 | 73 | /** 74 | * preempt_enabled - returns true if preemption is enabled 75 | */ 76 | static __always_inline __nofp bool preempt_enabled(void) 77 | { 78 | return (perthread_read(preempt_cnt) & ~PREEMPT_NOT_PENDING) == 0; 79 | } 80 | 81 | /** 82 | * assert_preempt_disabled - asserts that preemption is disabled 83 | */ 84 | static inline void assert_preempt_disabled(void) 85 | { 86 | assert(!preempt_enabled()); 87 | } 88 | 89 | /** 90 | * clear_preempt_needed - clear the flag that indicates a preemption request is 91 | * pending 92 | */ 93 | static inline void clear_preempt_needed(void) 94 | { 95 | BUILD_ASSERT(PREEMPT_NOT_PENDING == 0x80000000); 96 | perthread_ori(preempt_cnt, 0x80000000); 97 | } 98 | -------------------------------------------------------------------------------- /inc/runtime/rculist.h: -------------------------------------------------------------------------------- 1 | /* 2 | * rculist.h - support for RCU list data structures 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | struct rcu_hlist_node { 10 | struct rcu_hlist_node __rcu *next; 11 | struct rcu_hlist_node * __rcu *pprev; 12 | }; 13 | 14 | struct rcu_hlist_head { 15 | struct rcu_hlist_node __rcu *head; 16 | }; 17 | 18 | /** 19 | * rcu_hlist_init_head - initializes an RCU hlist 20 | * @h: the list head 21 | */ 22 | static inline void rcu_hlist_init_head(struct rcu_hlist_head *h) 23 | { 24 | RCU_INIT_POINTER(h->head, NULL); 25 | } 26 | 27 | /** 28 | * rcu_hlist_add_head - adds a node to the head of an RCU hlist 29 | * @h: the list head 30 | * @n: the node to add 31 | */ 32 | static inline void rcu_hlist_add_head(struct rcu_hlist_head *h, 33 | struct rcu_hlist_node *n) 34 | { 35 | struct rcu_hlist_node *head = h->head; 36 | RCU_INIT_POINTER(n->next, head); 37 | n->pprev = &h->head; 38 | rcu_assign_pointer(h->head, n); 39 | if (head) 40 | head->pprev = &n->next; 41 | } 42 | 43 | /** 44 | * rcu_hlist_del - removes a node from an RCU hlist 45 | * @n: the node to remove 46 | */ 47 | static inline void rcu_hlist_del(struct rcu_hlist_node *n) 48 | { 49 | rcu_assign_pointer(*n->pprev, n->next); 50 | if (n->next) 51 | n->next->pprev = n->pprev; 52 | } 53 | 54 | /** 55 | * rcu_hlist_empty - returns true if the RCU hlist is empty 56 | * @h: the list head 57 | * @check: proof that a lock is held 58 | * 59 | * If @check is false, must be in an RCU critical section. 60 | */ 61 | static inline bool rcu_hlist_empty(struct rcu_hlist_head *h, bool check) 62 | { 63 | return rcu_dereference_protected(h->head, check) == NULL; 64 | } 65 | 66 | #define rcu_hlist_entry(n, type, member) container_of(n, type, member) 67 | 68 | #define rcu_hlist_for_each(h, pos, check) \ 69 | for ((pos) = rcu_dereference_protected((h)->head, check); (pos);\ 70 | (pos) = rcu_dereference_protected((pos)->next, check)) 71 | 72 | #define rcu_hlist_for_each_safe(h, pos, tmp, check) \ 73 | for ((pos) = rcu_dereference_protected((h)->head, check); (pos) \ 74 | && ((tmp) = rcu_dereference_protected((pos)->next, check), 1);\ 75 | (pos) = (tmp)) 76 | -------------------------------------------------------------------------------- /inc/runtime/runtime.h: -------------------------------------------------------------------------------- 1 | /* 2 | * runtime.h - runtime initialization and metrics 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | 12 | /* main initialization */ 13 | typedef int (*initializer_fn_t)(void); 14 | 15 | extern int runtime_set_initializers(initializer_fn_t global_fn, 16 | initializer_fn_t perthread_fn, 17 | initializer_fn_t late_fn); 18 | extern int runtime_init(const char *cfgpath, thread_fn_t main_fn, void *arg); 19 | 20 | 21 | extern struct runtime_info *runtime_info; 22 | 23 | /** 24 | * runtime_queue_us - returns the us of packet queueing delay + runtime queueing 25 | * delay 26 | */ 27 | static inline uint64_t runtime_queue_us(void) 28 | { 29 | return ACCESS_ONCE(runtime_info->congestion.delay_us); 30 | } 31 | 32 | /** 33 | * runtime_load - returns the current CPU usage (number of cores) 34 | */ 35 | static inline float runtime_load(void) 36 | { 37 | return ACCESS_ONCE(runtime_info->congestion.load); 38 | } 39 | 40 | /** 41 | * runtime_active_cores - returns the number of currently active cores 42 | * 43 | */ 44 | static inline int runtime_active_cores(void) 45 | { 46 | extern atomic_t runningks; 47 | return atomic_read(&runningks); 48 | } 49 | 50 | /** 51 | * runtime_max_cores - returns the maximum number of cores 52 | * 53 | * The runtime could be given at most this number of cores by the IOKernel. 54 | */ 55 | static inline int runtime_max_cores(void) 56 | { 57 | extern unsigned int maxks; 58 | return maxks; 59 | } 60 | 61 | /** 62 | * runtime_guaranteed_cores - returns the guaranteed number of cores 63 | * 64 | * The runtime will get at least this number of cores by the IOKernel if it 65 | * requires them. 66 | */ 67 | static inline int runtime_guaranteed_cores(void) 68 | { 69 | extern unsigned int guaranteedks; 70 | return guaranteedks; 71 | } 72 | -------------------------------------------------------------------------------- /inc/runtime/smalloc.h: -------------------------------------------------------------------------------- 1 | /* 2 | * smalloc.h - malloc() based on the SLAB and thread-local item caches 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | #include 10 | 11 | #define __smalloc_attr __malloc __assume_aligned(16) 12 | 13 | extern void *smalloc(size_t size) __smalloc_attr; 14 | extern void *__szalloc(size_t size) __smalloc_attr; 15 | extern void sfree(void *item); 16 | 17 | /** 18 | * szalloc - allocates zeroed memory 19 | * @size: the size of the item 20 | * 21 | * Returns an item or NULL if out of memory. 22 | */ 23 | static __always_inline void *szalloc(size_t size) 24 | { 25 | if (__builtin_constant_p(size)) { 26 | void *item = smalloc(size); 27 | if (unlikely(!item)) 28 | return NULL; 29 | memset(item, 0, size); 30 | return item; 31 | } 32 | return __szalloc(size); 33 | } 34 | 35 | /** 36 | * smalloc_array - allocates a contiguous array of items 37 | * @n: the number of items 38 | * @size: the size of each item 39 | * 40 | * Returns an item array, or NULL if out of memory. 41 | */ 42 | static __always_inline void *smalloc_array(size_t n, size_t size) 43 | { 44 | return smalloc(n * size); 45 | } 46 | -------------------------------------------------------------------------------- /inc/runtime/storage.h: -------------------------------------------------------------------------------- 1 | /* 2 | * storage.h - Storage 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | extern int storage_write(const void *payload, uint64_t lba, uint32_t lba_count); 10 | extern int storage_read(void *dest, uint64_t lba, uint32_t lba_count); 11 | 12 | 13 | 14 | /* 15 | * storage_block_size - get the size of a block from the nvme device 16 | */ 17 | static inline uint32_t storage_block_size(void) 18 | { 19 | extern uint32_t block_size; 20 | return block_size; 21 | } 22 | 23 | /* 24 | * storage_num_blocks - gets the number of blocks from the nvme device 25 | */ 26 | static inline uint64_t storage_num_blocks(void) 27 | { 28 | extern uint64_t num_blocks; 29 | return num_blocks; 30 | } 31 | -------------------------------------------------------------------------------- /inc/runtime/tcp.h: -------------------------------------------------------------------------------- 1 | /* 2 | * tcp.h - TCP sockets 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | struct tcpqueue; 12 | typedef struct tcpqueue tcpqueue_t; 13 | struct tcpconn; 14 | typedef struct tcpconn tcpconn_t; 15 | 16 | extern int tcp_dial(struct netaddr laddr, struct netaddr raddr, 17 | tcpconn_t **c_out); 18 | extern int tcp_dial_nonblocking(struct netaddr laddr, struct netaddr raddr, 19 | tcpconn_t **c_out); 20 | extern int tcp_dial_affinity(uint32_t affinity, struct netaddr raddr, 21 | tcpconn_t **c_out); 22 | extern int tcp_dial_conn_affinity(tcpconn_t *in, struct netaddr raddr, 23 | tcpconn_t **c_out); 24 | 25 | extern void tcp_set_nonblocking(tcpconn_t *c, bool nonblocking); 26 | 27 | extern int tcp_listen(struct netaddr laddr, int backlog, tcpqueue_t **q_out); 28 | extern int tcp_accept(tcpqueue_t *q, tcpconn_t **c_out); 29 | extern void tcp_qshutdown(tcpqueue_t *q); 30 | extern void tcp_qclose(tcpqueue_t *q); 31 | extern void tcpq_set_nonblocking(tcpqueue_t *q, bool nonblocking); 32 | extern struct netaddr tcpq_local_addr(tcpqueue_t *q); 33 | extern int tcpq_backlog(tcpqueue_t *q); 34 | extern struct netaddr tcp_local_addr(tcpconn_t *c); 35 | extern struct netaddr tcp_remote_addr(tcpconn_t *c); 36 | extern int tcp_get_status(tcpconn_t *c); 37 | extern ssize_t tcp_read(tcpconn_t *c, void *buf, size_t len); 38 | extern ssize_t tcp_write(tcpconn_t *c, const void *buf, size_t len); 39 | extern ssize_t tcp_readv(tcpconn_t *c, const struct iovec *iov, int iovcnt); 40 | extern ssize_t tcp_writev(tcpconn_t *c, const struct iovec *iov, int iovcnt); 41 | extern int tcp_shutdown(tcpconn_t *c, int how); 42 | extern void tcp_abort(tcpconn_t *c); 43 | extern void tcp_close(tcpconn_t *c); 44 | -------------------------------------------------------------------------------- /inc/runtime/thread.h: -------------------------------------------------------------------------------- 1 | /* 2 | * thread.h - support for user-level threads 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | struct thread; 14 | typedef void (*thread_fn_t)(void *arg); 15 | typedef struct thread thread_t; 16 | 17 | 18 | /* 19 | * Low-level routines, these are helpful for bindings and synchronization 20 | * primitives. 21 | */ 22 | 23 | extern void thread_park_and_unlock_np(spinlock_t *l); 24 | extern void thread_park_and_preempt_enable(void); 25 | extern void thread_ready(thread_t *thread); 26 | extern void thread_ready_head(thread_t *thread); 27 | extern thread_t *thread_create(thread_fn_t fn, void *arg); 28 | extern thread_t *thread_create_with_buf(thread_fn_t fn, void **buf, size_t len); 29 | 30 | DECLARE_PERTHREAD(thread_t *, __self); 31 | DECLARE_PERTHREAD(unsigned int, kthread_idx); 32 | 33 | static inline unsigned int get_current_affinity(void) 34 | { 35 | return perthread_read(kthread_idx); 36 | } 37 | 38 | /** 39 | * thread_self - gets the currently running thread 40 | */ 41 | inline thread_t *thread_self(void) 42 | { 43 | return perthread_read_stable(__self); 44 | } 45 | 46 | 47 | extern uint64_t get_uthread_specific(void); 48 | extern void set_uthread_specific(uint64_t val); 49 | 50 | 51 | /* 52 | * High-level routines, use this API most of the time. 53 | */ 54 | 55 | extern void thread_yield(void); 56 | extern int thread_spawn(thread_fn_t fn, void *arg); 57 | extern void thread_exit(void) __noreturn; 58 | -------------------------------------------------------------------------------- /inc/runtime/timer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * timer.h - support for timers 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | typedef void (*timer_fn_t)(unsigned long arg); 12 | 13 | struct kthread; 14 | 15 | struct timer_entry { 16 | bool armed; 17 | bool executing; 18 | bool cancelling; 19 | unsigned int idx; 20 | timer_fn_t fn; 21 | unsigned long arg; 22 | struct kthread *localk; 23 | }; 24 | 25 | 26 | /* 27 | * Low-level API 28 | */ 29 | 30 | /** 31 | * timer_init - initializes a timer 32 | * @e: the timer entry to initialize 33 | * @fn: the timer handler (called when the timer fires) 34 | * @arg: an argument passed to the timer handler 35 | */ 36 | static inline void 37 | timer_init(struct timer_entry *e, timer_fn_t fn, unsigned long arg) 38 | { 39 | e->armed = false; 40 | e->executing = false; 41 | e->fn = fn; 42 | e->arg = arg; 43 | e->localk = NULL; 44 | } 45 | 46 | /** 47 | * timer_finish - de-initializes a timer that has already expired 48 | * @e: the timer entry 49 | * 50 | * Ensures that it is safe to reclaim the memory for timer_entry. 51 | * This function may spin temporarily if racing with the timer firing code. 52 | * Should not be called on a timer that hasn't expired yet - use timer_cancel 53 | * instead. 54 | */ 55 | static inline void timer_finish(struct timer_entry *e) 56 | { 57 | assert(!e->armed); 58 | 59 | if (unlikely(load_acquire(&e->executing))) 60 | while (load_acquire(&e->executing)) 61 | cpu_relax(); 62 | } 63 | 64 | static inline bool timer_busy(const struct timer_entry *e) 65 | { 66 | return load_acquire(&e->armed) || load_acquire(&e->executing); 67 | } 68 | 69 | extern void timer_start(struct timer_entry *e, uint64_t deadline_us); 70 | extern bool __timer_cancel(struct timer_entry *e); 71 | static inline bool timer_cancel(struct timer_entry *e) 72 | { 73 | if (!load_acquire(&e->armed)) { 74 | if (unlikely(load_acquire(&e->executing))) { 75 | while (load_acquire(&e->executing)) 76 | cpu_relax(); 77 | } 78 | return false; 79 | } 80 | 81 | return __timer_cancel(e); 82 | } 83 | 84 | extern void timer_restart(struct timer_entry *e, uint64_t deadline_us); 85 | extern bool timer_cancel_recurring(struct timer_entry *e); 86 | 87 | 88 | 89 | /* 90 | * High-level API 91 | */ 92 | 93 | extern void timer_sleep_until(uint64_t deadline_us); 94 | extern void timer_sleep(uint64_t duration_us); 95 | -------------------------------------------------------------------------------- /iokernel/hw_timestamp.h: -------------------------------------------------------------------------------- 1 | /* 2 | * hw_timestamp.h - methods for tracking hardware timestamps in MLX5 3 | */ 4 | 5 | #ifdef MLX5 6 | 7 | #include 8 | #include 9 | 10 | #include "defs.h" 11 | 12 | #define DEVICE_NAME_MAX 64 13 | extern char device_name[DEVICE_NAME_MAX]; 14 | 15 | extern double device_us_per_cycle; 16 | extern uint32_t curr_hw_time; 17 | extern void *hca_core_clock; 18 | 19 | static inline bool is_hw_timestamp_enabled() 20 | { 21 | return !cfg.no_hw_qdel; 22 | } 23 | 24 | static inline void hw_timestamp_update(void) 25 | { 26 | if (cfg.no_hw_qdel) 27 | return; 28 | 29 | /* read the low 32 bits of the hardware counter */ 30 | curr_hw_time = be32toh(mmio_read32_be(hca_core_clock + 4)); 31 | } 32 | 33 | static inline uint64_t hw_timestamp_delay_us(struct mlx5_cqe64 *cqe) 34 | { 35 | double us; 36 | uint32_t hwstamp = (uint32_t)be64toh(ACCESS_ONCE(cqe->timestamp)); 37 | 38 | if (wraps_lte(hwstamp, curr_hw_time)) { 39 | us = (double)(curr_hw_time - hwstamp) * device_us_per_cycle; 40 | return us; 41 | } 42 | return 0; 43 | } 44 | 45 | #else 46 | 47 | struct mlx5_cqe64; 48 | 49 | static inline bool is_hw_timestamp_enabled() 50 | { 51 | return false; 52 | } 53 | static inline void hw_timestamp_update(void) {} 54 | static inline uint64_t hw_timestamp_delay_us(struct mlx5_cqe64 *cqe) 55 | { 56 | return 0; 57 | } 58 | 59 | static inline int nl_register_mac_address(struct eth_addr *mac) 60 | { 61 | return 0; 62 | } 63 | 64 | static inline int nl_remove_mac_address(struct eth_addr *mac) 65 | { 66 | return 0; 67 | } 68 | 69 | 70 | #endif 71 | -------------------------------------------------------------------------------- /iokernel/ias_ts.c: -------------------------------------------------------------------------------- 1 | /* 2 | * ias_ts.c - the time sharing controller 3 | */ 4 | 5 | #include 6 | #include 7 | 8 | #include "defs.h" 9 | #include "sched.h" 10 | #include "ksched.h" 11 | #include "ias.h" 12 | 13 | /* statistics */ 14 | uint64_t ias_ts_yield_count; 15 | 16 | /** 17 | * ias_ts_poll - runs the time sharing controller 18 | */ 19 | void ias_ts_poll(void) 20 | { 21 | struct thread *th; 22 | struct ias_data *sd; 23 | struct thread_metrics *m; 24 | unsigned int core, tmp; 25 | 26 | sched_for_each_allowed_core(core, tmp) { 27 | sd = cores[core]; 28 | if (!sd || sd->quantum_us == 0) 29 | continue; 30 | th = sched_get_thread_on_core(core); 31 | if (!th) 32 | continue; 33 | 34 | m = &th->metrics; 35 | if (!m->work_pending || m->uthread_elapsed_us < sd->quantum_us) 36 | continue; 37 | 38 | ias_ts_yield_count++; 39 | sched_yield_on_core(core); 40 | } 41 | } 42 | 43 | void ias_core_ts_poll(void) 44 | { 45 | struct ias_data *sd, *sd_next; 46 | int ret, cnt; 47 | 48 | /* if there are congested LCs, there are no BEs running. */ 49 | if (congested_lc_procs_nr > 0) 50 | return; 51 | 52 | cnt = 0; 53 | 54 | /* Check BEs with 0 cores running */ 55 | list_for_each_safe(&congested_procs[1], sd, sd_next, congested_link) { 56 | if (sd->threads_active > 0 || sd->threads_limit == 0) 57 | continue; 58 | 59 | ret = ias_add_kthread(sd); 60 | if (ret) 61 | break; 62 | 63 | if (++cnt == sched_cores_nr) 64 | return; 65 | } 66 | 67 | } -------------------------------------------------------------------------------- /iokernel/ksched.c: -------------------------------------------------------------------------------- 1 | /* 2 | * ksched.c - an interface to the ksched kernel module 3 | */ 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | 14 | #include "ksched.h" 15 | 16 | /* a file descriptor handle to the ksched kernel module */ 17 | int ksched_fd; 18 | /* the number of pending interrupts */ 19 | int ksched_count; 20 | /* the number of pending pmc-sampling interrupts */ 21 | int ksched_pmc_count; 22 | /* whether UINTR is enabled */ 23 | bool ksched_has_uintr; 24 | /* most recent core with an enqueued interrupt */ 25 | int last_intr_core; 26 | /* the shared memory region with the kernel module */ 27 | struct ksched_shm_cpu *ksched_shm; 28 | /* the set of pending cores to send interrupts to */ 29 | cpu_set_t ksched_set; 30 | /* the generation number for each core */ 31 | unsigned int ksched_gens[NCPU]; 32 | 33 | /** 34 | * ksched_uintr_init - initializes UINTR using ksched kernel 35 | * 36 | * Must be called on the dataplane core. 37 | * 38 | * Returns 0 if successful. 39 | */ 40 | void ksched_uintr_init(void) 41 | { 42 | int ret; 43 | ret = ioctl(ksched_fd, KSCHED_IOC_UINTR_SETUP_ADMIN, 0); 44 | ksched_has_uintr = (ret == 0); 45 | log_info("UINTR: %s", ksched_has_uintr ? "enabled" : "disabled"); 46 | } 47 | 48 | /** 49 | * ksched_init - initializes the ksched kernel module interface 50 | * 51 | * Returns 0 if successful. 52 | */ 53 | int ksched_init(void) 54 | { 55 | char *ksched_addr; 56 | int i; 57 | 58 | /* first open the file descriptor */ 59 | ksched_fd = open("/dev/ksched", O_RDWR); 60 | if (ksched_fd < 0) { 61 | log_err("Could not find ksched kernel module (%s). Please ensure that " 62 | "ksched is compiled and inserted (see README for more details)", 63 | strerror(errno)); 64 | return -errno; 65 | } 66 | 67 | if (ioctl(ksched_fd, KSCHED_IOC_GET_SCHED_API_VER) != KSCHED_SCHED_API_VER) { 68 | log_err("ksched module API mismatch"); 69 | return -1; 70 | } 71 | 72 | /* then map the shared memory region with the kernel */ 73 | ksched_addr = mmap(NULL, sizeof(struct ksched_shm_cpu) * NCPU, 74 | PROT_READ | PROT_WRITE, MAP_SHARED, ksched_fd, 0); 75 | if (ksched_addr == MAP_FAILED) 76 | return -errno; 77 | 78 | /* then initialize the generation numbers */ 79 | ksched_shm = (struct ksched_shm_cpu *)ksched_addr; 80 | for (i = 0; i < NCPU; i++) { 81 | ksched_gens[i] = load_acquire(&ksched_shm[i].last_gen); 82 | ksched_idle_hint(i, 0); 83 | } 84 | 85 | return 0; 86 | } 87 | -------------------------------------------------------------------------------- /iokernel/pcm.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | /* Declarations of relevant functions in patched PCM library */ 4 | extern uint32_t pcm_caladan_get_cas_count(uint32_t channel); 5 | extern uint32_t pcm_caladan_get_active_channel_count(void); 6 | extern int pcm_caladan_init(int socket); 7 | 8 | -------------------------------------------------------------------------------- /iokernel/pmc.h: -------------------------------------------------------------------------------- 1 | /* 2 | * pmc.h - useful definitions for Intel Performance Counters 3 | */ 4 | 5 | #pragma once 6 | 7 | #define PMC_ESEL_UMASK_SHIFT 8 8 | #define PMC_ESEL_CMASK_SHIFT 24 9 | #define PMC_ESEL_ENTRY(event, umask, cmask) \ 10 | (((event) & 0xFFUL) | \ 11 | (((umask) & 0xFFUL) << PMC_ESEL_UMASK_SHIFT) | \ 12 | (((cmask) & 0xFFUL) << PMC_ESEL_CMASK_SHIFT)) 13 | #define PMC_ESEL_USR (1ULL << 16) /* User Mode */ 14 | #define PMC_ESEL_OS (1ULL << 17) /* Kernel Mode */ 15 | #define PMC_ESEL_EDGE (1ULL << 18) /* Edge detect */ 16 | #define PMC_ESEL_PC (1ULL << 19) /* Pin control */ 17 | #define PMC_ESEL_INT (1ULL << 20) /* APIC interrupt enable */ 18 | #define PMC_ESEL_ANY (1ULL << 21) /* Any thread */ 19 | #define PMC_ESEL_ENABLE (1ULL << 22) /* Enable counters */ 20 | #define PMC_ESEL_INV (1ULL << 23) /* Invert counter mask */ 21 | 22 | /* architectural performance counters (works on all Intel CPUs) */ 23 | #define PMC_ARCH_CORE_CYCLES PMC_ESEL_ENTRY(0x3C, 0x00, 0) 24 | #define PMC_ARCH_INSTR_RETIRED PMC_ESEL_ENTRY(0xC0, 0x00, 0) 25 | #define PMC_ARCH_REF_CYCLES PMC_ESEL_ENTRY(0x3C, 0x01, 0) 26 | #define PMC_ARCH_LLC_REF PMC_ESEL_ENTRY(0x2E, 0x4F, 0) 27 | #define PMC_ARCH_LLC_MISSES PMC_ESEL_ENTRY(0x2E, 0x41, 0) 28 | #define PMC_ARCH_BRANCHES PMC_ESEL_ENTRY(0xC4, 0x00, 0) 29 | #define PMC_ARCH_BRANCH_MISSES PMC_ESEL_ENTRY(0xC5, 0x00, 0) 30 | 31 | /* this performance counter measures LLC misses as a proxy for mem bandwidth */ 32 | #define PMC_LLC_MISSES (PMC_ARCH_LLC_MISSES | PMC_ESEL_USR | PMC_ESEL_OS | \ 33 | PMC_ESEL_ENABLE) 34 | #define PMC_LLC_MISSES_ANY (PMC_ARCH_LLC_MISSES | PMC_ESEL_USR | PMC_ESEL_OS | \ 35 | PMC_ESEL_ANY | PMC_ESEL_ENABLE) 36 | -------------------------------------------------------------------------------- /iokernel/ref.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ref.h - generic support for reference counts 3 | * 4 | * This implementation is inspired by the following paper: 5 | * Kroah-Hartman, Greg, kobjects and krefs. Linux Symposium 2004 6 | * 7 | * This version doesn't use atomics. 8 | */ 9 | 10 | #pragma once 11 | 12 | #include 13 | 14 | struct ref { 15 | int cnt; 16 | }; 17 | 18 | /** 19 | * ref_init - initializes the reference count to one 20 | * @ref: the kref 21 | */ 22 | static inline void 23 | ref_init(struct ref *ref) 24 | { 25 | ref->cnt = 1; 26 | } 27 | 28 | /** 29 | * ref_get - atomically increments the reference count 30 | * @ref: the kref 31 | */ 32 | static inline void 33 | ref_get(struct ref *ref) 34 | { 35 | assert(ref->cnt > 0); 36 | ref->cnt++; 37 | } 38 | 39 | /** 40 | * ref_put - atomically decrements the reference count, releasing the object 41 | * when it reaches zero 42 | * @ref: the ref 43 | * @release: a pointer to the release function 44 | */ 45 | static inline void 46 | ref_put(struct ref *ref, void (*release)(struct ref *ref)) 47 | { 48 | assert(release); 49 | if (--ref->cnt == 0) 50 | release(ref); 51 | } 52 | -------------------------------------------------------------------------------- /iokernel/stat.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | 6 | #include "sched.h" 7 | #include "defs.h" 8 | 9 | #define BUFSIZE 4096 10 | 11 | uint64_t stats[NR_STATS]; 12 | 13 | static const char *stat_names[] = { 14 | "RX_UNREGISTERED_MAC", 15 | "RX_UNICAST_FAIL", 16 | "RX_BROADCAST_FAIL", 17 | "RX_FLOW_TAG_MATCH", 18 | "RX_UNHANDLED", 19 | "RX_HASH_MISSING", 20 | "PARKED_THREAD_BUSY_WAKE", 21 | "PARK_FAST_REWAKE", 22 | "TX_COMPLETION_OVERFLOW", 23 | "TX_COMPLETION_FAIL", 24 | "RX_PULLED", 25 | "COMMANDS_PULLED", 26 | "COMPLETION_DRAINED", 27 | "COMPLETION_ENQUEUED", 28 | "LOOPS", 29 | "TX_PULLED", 30 | "TX_BACKPRESSURE", 31 | "SCHED_RUN", 32 | "PREEMPT", 33 | "RX_REFILL", 34 | "DIRECTPATH_EVENTS", 35 | "DMA_ENQUEUE", 36 | "DMA_DEQUEUE", 37 | "DMA_SUBMIT", 38 | }; 39 | 40 | BUILD_ASSERT(ARRAY_SIZE(stat_names) == NR_STATS); 41 | 42 | static void print_stats(void) 43 | { 44 | int i; 45 | uint64_t now, cur_stats[NR_STATS]; 46 | static uint64_t last_stats[NR_STATS]; 47 | 48 | barrier(); 49 | now = rdtsc(); 50 | for (i = 0; i < NR_STATS; i++) 51 | cur_stats[i] = ACCESS_ONCE(stats[i]); 52 | barrier(); 53 | 54 | printf("-----------------\n"); 55 | 56 | for (i = 0; i < NR_STATS; i++) { 57 | printf("%lu %s %lu\n", now, stat_names[i], 58 | cur_stats[i] - last_stats[i]); 59 | last_stats[i] = cur_stats[i]; 60 | } 61 | 62 | fflush(stdout); 63 | } 64 | 65 | static void *print_stats_thread(void *arg) 66 | { 67 | cpu_set_t cpuset; 68 | int ret; 69 | 70 | CPU_ZERO(&cpuset); 71 | CPU_SET(sched_ctrl_core, &cpuset); 72 | 73 | ret = sched_setaffinity(thread_gettid(), sizeof(cpu_set_t), &cpuset); 74 | if (ret < 0) { 75 | log_warn("log: failed to pin to contorl core with err %d", errno); 76 | return NULL; 77 | } 78 | 79 | while (true) { 80 | print_stats(); 81 | sleep(1); 82 | } 83 | } 84 | 85 | int stats_init(void) 86 | { 87 | pthread_t tid; 88 | 89 | return pthread_create(&tid, NULL, print_stats_thread, NULL); 90 | } 91 | 92 | -------------------------------------------------------------------------------- /ksched/.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | -------------------------------------------------------------------------------- /ksched/Kbuild: -------------------------------------------------------------------------------- 1 | obj-m += ksched.o fake_idle.o 2 | 3 | ksched-objs := ksched_main.o uintr.o 4 | -------------------------------------------------------------------------------- /ksched/Makefile: -------------------------------------------------------------------------------- 1 | KDIR ?= /lib/modules/$(shell uname -r)/build 2 | BUILD_DIR ?= $(PWD)/build 3 | BUILD_DIR_MAKEFILE ?= $(PWD)/build/Makefile 4 | 5 | default: $(BUILD_DIR_MAKEFILE) 6 | make -C $(KDIR) M=$(BUILD_DIR) src=$(PWD) modules 7 | 8 | $(BUILD_DIR): 9 | mkdir -p "$@" 10 | 11 | $(BUILD_DIR_MAKEFILE): $(BUILD_DIR) 12 | touch "$@" 13 | 14 | clean: 15 | make -C $(KDIR) M=$(BUILD_DIR) src=$(PWD) clean 16 | -------------------------------------------------------------------------------- /ksched/defs.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "ksched.h" 6 | #include "uintr_hw.h" 7 | 8 | struct ksched_percpu { 9 | unsigned int last_gen; 10 | local_t busy; 11 | u64 last_sel; 12 | struct task_struct *running_task; 13 | 14 | struct uintr_percpu uintr; 15 | }; 16 | 17 | extern __read_mostly struct ksched_shm_cpu *shm; 18 | DECLARE_PER_CPU(struct ksched_percpu, kp); -------------------------------------------------------------------------------- /ksched/fake_idle.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | static struct kobject *kobj; 9 | static int unloaded; 10 | static int refcnt_for_unload; 11 | 12 | static int __cpuidle fake_idle(struct cpuidle_device *dev, 13 | struct cpuidle_driver *drv, int index) 14 | { 15 | return index; 16 | } 17 | 18 | static struct cpuidle_driver fake_idle_driver = { 19 | .name = "fake_idle", 20 | .owner = THIS_MODULE, 21 | .states = { 22 | { 23 | .enter = fake_idle, 24 | .exit_latency = 1, 25 | .target_residency = 1, 26 | .name = "", 27 | .desc = "", 28 | }, 29 | }, 30 | .safe_state_index = 0, 31 | .state_count = 1, 32 | }; 33 | 34 | static ssize_t unload_store(struct kobject *kobj, struct kobj_attribute *attr, 35 | const char *buf, size_t count) { 36 | if (unloaded) return -ENODEV; 37 | if (atomic_read(&THIS_MODULE->refcnt) + 1 != refcnt_for_unload) return -EBUSY; 38 | unloaded = 1; 39 | cpuidle_unregister(&fake_idle_driver); 40 | return count; 41 | } 42 | 43 | static ssize_t unload_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf){ 44 | return sprintf(buf, "%s\n", unloaded ? "unloaded" : "loaded"); 45 | } 46 | 47 | static struct kobj_attribute unload_attr = __ATTR(unload, 0644, unload_show, unload_store); 48 | 49 | static int __init create_sysfs_entry(void) 50 | { 51 | int err; 52 | 53 | kobj = kobject_create_and_add("fake_idle", NULL); 54 | if (kobj == NULL) 55 | return -ENOMEM; 56 | err = sysfs_create_file(kobj, &unload_attr.attr); 57 | if (err) 58 | kobject_put(kobj); 59 | 60 | return err; 61 | } 62 | 63 | static int __init fake_idle_init(void) 64 | { 65 | int err; 66 | 67 | 68 | err = create_sysfs_entry(); 69 | if (err) 70 | return err; 71 | 72 | err = cpuidle_register(&fake_idle_driver, NULL); 73 | if (err) 74 | kobject_put(kobj); 75 | 76 | refcnt_for_unload = atomic_read(&THIS_MODULE->refcnt); 77 | return err; 78 | } 79 | 80 | static void __exit fake_idle_exit(void) 81 | { 82 | kobject_put(kobj); 83 | } 84 | 85 | module_init(fake_idle_init); 86 | module_exit(fake_idle_exit); 87 | 88 | MODULE_LICENSE("GPL"); 89 | -------------------------------------------------------------------------------- /ksched/ksched.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ksched.h - the UAPI for ksched and its ioctl's 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | /* 11 | * NOTE: normally character devices are dynamically allocated, but for 12 | * convenience we use 280. 13 | */ 14 | #define KSCHED_MAJOR 280 15 | #define KSCHED_MINOR 0 16 | 17 | /* 18 | * Bump these version numbers when the API for the scheduler or programs is 19 | * modified. 20 | */ 21 | #define KSCHED_SCHED_API_VER 2 22 | #define KSCHED_USER_API_VER 2 23 | 24 | struct ksched_intr_req { 25 | size_t len; 26 | const void __user *mask; 27 | }; 28 | 29 | struct uintr_upid { 30 | union { 31 | struct { 32 | __u8 status; /* bit 0: ON, bit 1: SN, bit 2-7: reserved */ 33 | __u8 reserved1; /* Reserved */ 34 | __u8 nv; /* Notification vector */ 35 | __u8 reserved2; /* Reserved */ 36 | __u32 ndst; /* Notification destination */ 37 | } nc __packed; /* Notification control */ 38 | long unsigned int word_val; 39 | }; 40 | __u64 puir; /* Posted user interrupt requests */ 41 | } __aligned(64); 42 | 43 | struct ksched_shm_cpu { 44 | /* written by userspace */ 45 | unsigned int gen; 46 | pid_t tid; 47 | unsigned int mwait_hint; 48 | unsigned int sig; 49 | unsigned int signum; 50 | unsigned int pmc; 51 | __u64 pmcsel; 52 | 53 | /* written by kernelspace */ 54 | unsigned int busy; 55 | unsigned int last_gen; 56 | __u64 pmcval; 57 | __u64 pmctsc; 58 | 59 | /* extra space for future features (and cache alignment) */ 60 | unsigned long rsv[1]; 61 | 62 | struct uintr_upid upid; 63 | }; 64 | 65 | #define KSCHED_MAGIC 0xF0 66 | #define KSCHED_IOC_MAXNR 9 67 | 68 | #define KSCHED_IOC_START _IO(KSCHED_MAGIC, 1) 69 | #define KSCHED_IOC_PARK _IO(KSCHED_MAGIC, 2) 70 | #define KSCHED_IOC_INTR _IOW(KSCHED_MAGIC, 3, struct ksched_intr_req) 71 | #define KSCHED_IOC_UINTR_MULTICAST _IOW(KSCHED_MAGIC, 4, struct ksched_intr_req) 72 | #define KSCHED_IOC_UINTR_SETUP_USER _IO(KSCHED_MAGIC, 5) 73 | #define KSCHED_IOC_UINTR_SETUP_ADMIN _IO(KSCHED_MAGIC, 6) 74 | #define KSCHED_IOC_GETTID _IO(KSCHED_MAGIC, 7) 75 | #define KSCHED_IOC_GET_USER_API_VER _IO(KSCHED_MAGIC, 8) 76 | #define KSCHED_IOC_GET_SCHED_API_VER _IO(KSCHED_MAGIC, 9) 77 | -------------------------------------------------------------------------------- /ksched/uintr.h: -------------------------------------------------------------------------------- 1 | // uintr.h 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #include "uintr_hw.h" 10 | 11 | /* Use KVM's posted interrupt vector */ 12 | #define UIPI_APIC_VECTOR POSTED_INTR_WAKEUP_VECTOR 13 | 14 | struct uintr_ctx { 15 | unsigned long handler; 16 | struct kref refcount; 17 | bool is_admin; 18 | 19 | /* sender UITT table */ 20 | struct uintr_uitt_entry uitt[]; 21 | }; 22 | 23 | struct uintr_xstate { 24 | struct xregs_state xregs; 25 | struct uintr_state uintr; 26 | } __packed __aligned(64); 27 | 28 | struct uintr_percpu { 29 | struct task_struct *assigned_task; 30 | struct uintr_ctx *assigned_ctx; 31 | 32 | bool state_loaded; 33 | bool is_admin_ctx; 34 | struct uintr_xstate cur_xstate; 35 | }; 36 | 37 | extern void uintr_cleanup_core(struct uintr_percpu *p, int cpu); 38 | extern void uintr_assign_core(struct uintr_ctx *ctx, u64 stack); 39 | extern long uintr_multicast(struct ksched_intr_req __user *ureq); 40 | 41 | extern void uintr_deliver_ipi(struct uintr_percpu *p); 42 | 43 | extern int uintr_init(void); 44 | extern void uintr_exit(void); 45 | extern long uintr_setup_admin(struct file *filp); 46 | extern long uintr_setup_user(struct file *filp, unsigned long handler); 47 | extern void uintr_file_release(struct file *filp); 48 | 49 | extern bool uintr_enabled; 50 | 51 | static inline struct uintr_ctx *to_uintr_ctx(struct file *filp) 52 | { 53 | return (struct uintr_ctx *)filp->private_data; 54 | } 55 | 56 | static inline bool uintr_active(struct uintr_percpu *p) 57 | { 58 | return p->assigned_ctx != NULL; 59 | } 60 | 61 | static inline void uintr_signal_self(void) 62 | { 63 | apic->send_IPI_self(UIPI_APIC_VECTOR); 64 | } -------------------------------------------------------------------------------- /ksched/uintr_hw.h: -------------------------------------------------------------------------------- 1 | 2 | // UINTR hardware definitions 3 | 4 | #pragma once 5 | 6 | #define X86_FEATURE_UINTR (18*32+ 5) /* User Interrupts support */ 7 | #define DISABLE_UINTR (1 << (X86_FEATURE_UINTR & 31)) 8 | 9 | /* User Interrupt interface */ 10 | #define MSR_IA32_UINTR_RR 0x985 11 | #define MSR_IA32_UINTR_HANDLER 0x986 12 | #define MSR_IA32_UINTR_STACKADJUST 0x987 13 | #define MSR_IA32_UINTR_MISC 0x988 /* 39:32-UINV, 31:0-UITTSZ */ 14 | #define MSR_IA32_UINTR_PD 0x989 15 | #define MSR_IA32_UINTR_TT 0x98a 16 | 17 | #define X86_CR4_UINTR_BIT 25 /* enable User Interrupts support */ 18 | #define X86_CR4_UINTR _BITUL(X86_CR4_UINTR_BIT) 19 | 20 | #define UINTR_UPID_STATUS_ON 0x0 /* Outstanding notification */ 21 | #define UINTR_UPID_STATUS_SN 0x1 /* Suppressed notification */ 22 | 23 | #define UINTR_UITT_VALID_BIT 0x0 24 | 25 | /* 26 | * State component 14 is supervisor state used for User Interrupts state. 27 | * The size of this state is 48 bytes 28 | */ 29 | struct uintr_state { 30 | __u64 handler; 31 | __u64 stack_adjust; 32 | struct { 33 | __u32 uitt_size; 34 | __u8 uinv; 35 | __u8 pad1; 36 | __u8 pad2; 37 | __u8 pad3:7; 38 | __u8 uif:1; 39 | } __packed misc; 40 | __u64 upid_addr; 41 | __u64 uirr; 42 | __u64 uitt_addr; 43 | } __packed; 44 | 45 | /* User Interrupt Target Table Entry (UITTE) */ 46 | struct uintr_uitt_entry { 47 | __u8 valid; /* bit 0: valid, bit 1-7: reserved */ 48 | __u8 user_vec; 49 | __u8 reserved[6]; 50 | __u64 target_upid_addr; 51 | } __packed __aligned(16); 52 | 53 | #define XFEATURE_UINTR 14 54 | #define XFEATURE_MASK_UINTR (1 << XFEATURE_UINTR) 55 | -------------------------------------------------------------------------------- /runtime/net/directpath/defs.h: -------------------------------------------------------------------------------- 1 | 2 | #pragma once 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "../defs.h" 11 | 12 | 13 | #define RQ_NUM_DESC 1024 14 | #define SQ_NUM_DESC 128 15 | #define SQ_CLEAN_THRESH RUNTIME_RX_BATCH_SIZE 16 | #define SQ_CLEAN_MAX SQ_CLEAN_THRESH 17 | 18 | /* space for the mbuf struct */ 19 | #define RX_BUF_HEAD \ 20 | (align_up(sizeof(struct mbuf), 2 * CACHE_LINE_SIZE)) 21 | /* some NICs expect enough padding for CRC etc., even if they strip it */ 22 | #define RX_BUF_TAIL 64 23 | 24 | static inline size_t directpath_get_buf_size(void) 25 | { 26 | if (cfg_directpath_strided) 27 | return DIRECTPATH_STRIDE_MODE_BUF_SZ; 28 | 29 | return align_up(net_get_mtu() + RX_BUF_HEAD + RX_BUF_TAIL, 30 | 2 * CACHE_LINE_SIZE); 31 | } 32 | 33 | extern struct pci_addr nic_pci_addr; 34 | extern struct mempool directpath_buf_mp; 35 | extern struct tcache *directpath_buf_tcache; 36 | extern DEFINE_PERTHREAD(struct tcache_perthread, directpath_buf_pt); 37 | extern void directpath_rx_completion(struct mbuf *m); 38 | 39 | extern int mlx5_init(void); 40 | extern int mlx5_init_thread(void); 41 | -------------------------------------------------------------------------------- /runtime/net/ping.c: -------------------------------------------------------------------------------- 1 | /* 2 | * ping.c - simple ping utility 3 | */ 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "defs.h" 13 | 14 | static uint16_t ping_id; 15 | 16 | int net_ping_init(void) 17 | { 18 | ping_id = rand(); 19 | return 0; 20 | } 21 | 22 | void net_send_ping(uint16_t seq_num, uint32_t daddr) 23 | { 24 | struct mbuf *m; 25 | struct ping_payload *payload; 26 | 27 | log_debug("ping: sending ping with id %u, seq_num %u to %u", ping_id, 28 | seq_num, daddr); 29 | 30 | m = net_tx_alloc_mbuf(ip_headroom() + sizeof(struct icmp_hdr)); 31 | if (unlikely(!m)) 32 | return; 33 | 34 | /* add send timestamp to payload */ 35 | payload = mbuf_push_hdr(m, struct ping_payload); 36 | gettimeofday(&payload->tx_time, NULL); 37 | 38 | if (unlikely(net_tx_icmp(m, ICMP_ECHO, 0, daddr, ping_id, seq_num) != 0)) 39 | mbuf_free(m); 40 | } 41 | 42 | /* 43 | * Subtract 2 timeval structs: out -= in. Assume out >= in. 44 | */ 45 | static void timeval_subtract(struct timeval *out, const struct timeval *in) 46 | { 47 | if ((out->tv_usec -= in->tv_usec) < 0) { 48 | --out->tv_sec; 49 | out->tv_usec += 1000000; 50 | } 51 | out->tv_sec -= in->tv_sec; 52 | } 53 | 54 | void net_recv_ping(const struct ping_payload *payload, 55 | const struct icmp_pkt *icmp_pkt) 56 | { 57 | struct timeval tmp_time; 58 | uint32_t latency_us; 59 | 60 | if (icmp_pkt->icmp_id != ping_id) { 61 | /* this ICMP pkt is not for us */ 62 | return; 63 | } 64 | 65 | /* determine latency */ 66 | gettimeofday(&tmp_time, NULL); 67 | timeval_subtract(&tmp_time, &payload->tx_time); 68 | latency_us = tmp_time.tv_sec * 1000000 + tmp_time.tv_usec; 69 | 70 | log_debug("ping: received ping with seq_num %u, latency %u us", 71 | icmp_pkt->icmp_seq, latency_us); 72 | } 73 | -------------------------------------------------------------------------------- /runtime/poll.c: -------------------------------------------------------------------------------- 1 | /* 2 | * poll.h - support for event polling (similar to select/epoll/poll, etc.) 3 | */ 4 | 5 | #include 6 | 7 | /** 8 | * poll_init - initializes a polling waiter object 9 | * @w: the waiter object to initialize 10 | */ 11 | void poll_init(poll_waiter_t *w) 12 | { 13 | spin_lock_init(&w->lock); 14 | list_head_init(&w->triggered); 15 | w->waiting_th = NULL; 16 | } 17 | 18 | /** 19 | * poll_arm - registers a trigger with a waiter 20 | * @w: the waiter to register with 21 | * @t: the trigger to register 22 | * @data: data to provide when the trigger fires 23 | */ 24 | void poll_arm(poll_waiter_t *w, poll_trigger_t *t, unsigned long data) 25 | { 26 | if (WARN_ON(t->waiter != NULL)) 27 | return; 28 | 29 | t->waiter = w; 30 | t->triggered = false; 31 | t->data = data; 32 | } 33 | 34 | /** 35 | * poll_disarm - unregisters a trigger with a waiter 36 | * @t: the trigger to unregister 37 | */ 38 | void poll_disarm(poll_trigger_t *t) 39 | { 40 | poll_waiter_t *w; 41 | if (WARN_ON(t->waiter == NULL)) 42 | return; 43 | 44 | w = t->waiter; 45 | spin_lock_np(&w->lock); 46 | if (t->triggered) { 47 | list_del(&t->link); 48 | t->triggered = false; 49 | } 50 | spin_unlock_np(&w->lock); 51 | 52 | t->waiter = NULL; 53 | } 54 | 55 | /** 56 | * poll_wait - waits for the next event to trigger 57 | * @w: the waiter to wait on 58 | * 59 | * Returns the data provided to the trigger that fired 60 | */ 61 | unsigned long poll_wait(poll_waiter_t *w) 62 | { 63 | thread_t *th = thread_self(); 64 | poll_trigger_t *t; 65 | 66 | while (true) { 67 | spin_lock_np(&w->lock); 68 | t = list_pop(&w->triggered, poll_trigger_t, link); 69 | if (t) { 70 | spin_unlock_np(&w->lock); 71 | return t->data; 72 | } 73 | w->waiting_th = th; 74 | thread_park_and_unlock_np(&w->lock); 75 | } 76 | } 77 | 78 | /** 79 | * poll_trigger - fires a trigger 80 | * @w: the waiter to wake up (if it is waiting) 81 | * @t: the trigger that fired 82 | */ 83 | void poll_trigger(poll_waiter_t *w, poll_trigger_t *t) 84 | { 85 | thread_t *wth = NULL; 86 | 87 | spin_lock_np(&w->lock); 88 | if (t->triggered) { 89 | spin_unlock_np(&w->lock); 90 | return; 91 | } 92 | t->triggered = true; 93 | list_add(&w->triggered, &t->link); 94 | if (w->waiting_th) { 95 | wth = w->waiting_th; 96 | w->waiting_th = NULL; 97 | } 98 | spin_unlock_np(&w->lock); 99 | 100 | if (wth) 101 | thread_ready(wth); 102 | } 103 | -------------------------------------------------------------------------------- /sample.config: -------------------------------------------------------------------------------- 1 | # an example runtime config file 2 | host_addr 192.168.1.5 3 | host_netmask 255.255.255.0 4 | host_gateway 192.168.1.1 5 | runtime_kthreads 3 6 | runtime_guaranteed_kthreads 0 7 | runtime_priority be 8 | -------------------------------------------------------------------------------- /scripts/count_loc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # run from shenango directory 4 | 5 | echo "Runtime" 6 | cloc runtime/* bindings/* inc/runtime/* --exclude-lang=D 7 | 8 | echo "IOKernel" 9 | cloc iokernel/* inc/iokernel/* --exclude-lang=D 10 | 11 | echo "Ksched" 12 | cloc ksched/* 13 | 14 | echo "Base" 15 | cloc base/* net/* inc/base/* inc/asm/* inc/net/* --exclude-lang=D 16 | 17 | echo "Spin-server + Loadgen" 18 | cloc apps/synthetic/* --exclude-lang=D 19 | -------------------------------------------------------------------------------- /scripts/cstate.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | static int pm_qos_fd = -1; 12 | 13 | void set_latency_target(int32_t target_us) 14 | { 15 | ssize_t ret; 16 | 17 | if (pm_qos_fd >= 0) 18 | return; 19 | pm_qos_fd = open("/dev/cpu_dma_latency", O_RDWR); 20 | if (pm_qos_fd < 0) { 21 | fprintf(stderr, "Failed to open PM QOS file: %s\n", 22 | strerror(errno)); 23 | exit(errno); 24 | } 25 | 26 | ret = write(pm_qos_fd, &target_us, sizeof(target_us)); 27 | if (ret < 0) { 28 | fprintf(stderr, "Fail to set QOS target\n"); 29 | exit(errno); 30 | } 31 | } 32 | 33 | int main(int argc, char *argv[]) 34 | { 35 | if (argc < 2) { 36 | printf("usage: [maximum c-state latency in microseconds]\n"); 37 | exit(1); 38 | } 39 | 40 | set_latency_target(atoi(argv[1])); 41 | while (1) 42 | sleep(10); 43 | return 0; 44 | } 45 | -------------------------------------------------------------------------------- /scripts/setup_machine.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # run with sudo 3 | 4 | # needed for the iokernel's shared memory 5 | sysctl -w kernel.shm_rmid_forced=1 6 | sysctl -w kernel.shmmax=18446744073692774399 7 | sysctl -w vm.hugetlb_shm_group=27 8 | sysctl -w vm.max_map_count=16777216 9 | sysctl -w net.core.somaxconn=3072 10 | 11 | # check to see if we need a fake idle driver 12 | if grep -q none /sys/devices/system/cpu/cpuidle/current_driver; then 13 | insmod $(dirname $0)/../ksched/build/fake_idle.ko 14 | fi 15 | 16 | # set up the ksched module 17 | rmmod ksched 18 | rm /dev/ksched 19 | 20 | if [[ "$1x" = "nouintrx" ]]; then 21 | insmod $(dirname $0)/../ksched/build/ksched.ko nouintr=1 22 | else 23 | insmod $(dirname $0)/../ksched/build/ksched.ko 24 | fi 25 | 26 | mknod /dev/ksched c 280 0 27 | chmod uga+rwx /dev/ksched 28 | 29 | # reserve huge pages 30 | echo 5192 | sudo tee /sys/devices/system/node/node*/hugepages/hugepages-2048kB/nr_hugepages > /dev/null 31 | 32 | echo madvise > /sys/kernel/mm/transparent_hugepage/enabled 33 | 34 | # load msr module 35 | modprobe msr 36 | 37 | -------------------------------------------------------------------------------- /scripts/spin.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | typedef uint64_t cycle_t; 19 | 20 | static inline cycle_t rdtsc(void) 21 | { 22 | uint32_t a, d; 23 | asm volatile("rdtsc" : "=a" (a), "=d" (d)); 24 | return ((uint64_t)a) | (((uint64_t)d) << 32); 25 | } 26 | 27 | static inline cycle_t rdtscp(uint32_t *auxp) 28 | { 29 | uint32_t a, d, c; 30 | asm volatile("rdtscp" : "=a" (a), "=d" (d), "=c" (c)); 31 | if (auxp) 32 | *auxp = c; 33 | return ((uint64_t)a) | (((uint64_t)d) << 32); 34 | } 35 | 36 | #define N 1000 37 | static cycle_t results[N]; 38 | static int nr; 39 | 40 | int main(int argc, char *argv[]) 41 | { 42 | cycle_t start, end; 43 | 44 | while (nr < N) { 45 | start = rdtsc(); 46 | end = rdtscp(NULL); 47 | if (end - start > 1000) 48 | results[nr++] = end - start; 49 | } 50 | 51 | std::sort(std::begin(results), std::end(results)); 52 | printf("median: %ld 99th: %ld 99.9th: %ld 99.99th: %ld\n", 53 | results[nr / 2], results[nr * 99 / 100], 54 | results[nr * 999 / 1000], results[nr * 9999 / 10000]); 55 | return 0; 56 | } 57 | -------------------------------------------------------------------------------- /server.config: -------------------------------------------------------------------------------- 1 | # an example runtime config file 2 | host_addr 192.168.1.3 3 | host_netmask 255.255.255.0 4 | host_gateway 192.168.1.1 5 | runtime_kthreads 4 6 | runtime_guaranteed_kthreads 4 7 | runtime_priority lc 8 | -------------------------------------------------------------------------------- /shim/Makefile: -------------------------------------------------------------------------------- 1 | ROOT_PATH=../ 2 | include $(ROOT_PATH)/build/shared.mk 3 | 4 | # handy for debugging 5 | print-% : ; @echo $* = $($*) 6 | 7 | # libshim.a - the shenango shim library 8 | shim_src = $(wildcard *.c) 9 | shim_obj = $(shim_src:.c=.o) 10 | 11 | # must be first 12 | all: libshim.a 13 | 14 | libshim.a: $(shim_obj) 15 | $(AR) rcs $@ $^ 16 | 17 | # general build rules for all targets 18 | src = $(shim_src) 19 | obj = $(src:.c=.o) 20 | dep = $(obj:.o=.d) 21 | 22 | ifneq ($(MAKECMDGOALS),clean) 23 | -include $(dep) # include all dep files in the makefile 24 | endif 25 | 26 | # rule to generate a dep file by using the C preprocessor 27 | # (see man cpp for details on the -MM and -MT options) 28 | %.d: %.c 29 | @$(CC) $(CFLAGS) $< -MM -MT $(@:.d=.o) >$@ 30 | %.o: %.c 31 | $(CC) $(CFLAGS) -c $< -o $@ 32 | 33 | .PHONY: clean 34 | clean: 35 | rm -f $(obj) $(dep) libshim.a 36 | -------------------------------------------------------------------------------- /shim/README: -------------------------------------------------------------------------------- 1 | 2 | To use, compile libshim.a and the target application with it. Link the dynamic loader library (-ldl) and use the linker flag '-Wl,--wrap=main' to wrap main. 3 | Make sure the application doesn't use static initializers for pthread mutexes etc. 4 | -------------------------------------------------------------------------------- /shim/common.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | static inline bool shim_active(void) 9 | { 10 | return base_init_done && thread_self() != NULL; 11 | } 12 | 13 | static inline void shim_preempt_enable(void) 14 | { 15 | if (likely(shim_active())) 16 | preempt_enable(); 17 | } 18 | 19 | static inline void shim_preempt_disable(void) 20 | { 21 | if (likely(shim_active())) 22 | preempt_disable(); 23 | } 24 | 25 | 26 | static inline void shim_spin_unlock_np(spinlock_t *l) 27 | { 28 | spin_unlock(l); 29 | shim_preempt_enable(); 30 | } 31 | 32 | static inline void shim_spin_lock_np(spinlock_t *l) 33 | { 34 | shim_preempt_disable(); 35 | spin_lock(l); 36 | } 37 | 38 | #define NOTSELF(name, ...) \ 39 | if (unlikely(!shim_active())) { \ 40 | static typeof(name) *fn; \ 41 | if (!fn) { \ 42 | fn = dlsym(RTLD_NEXT, #name); \ 43 | BUG_ON(!fn); \ 44 | } \ 45 | return fn(__VA_ARGS__); \ 46 | } 47 | -------------------------------------------------------------------------------- /shim/entry.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | #include 5 | 6 | int __real_main(int, char **); 7 | 8 | static int main_argc, main_ret; 9 | static char **main_argv; 10 | 11 | static void runtime_entry(void *arg) 12 | { 13 | main_ret = __real_main(main_argc, main_argv); 14 | } 15 | 16 | int __weak __wrap_main(int argc, char **argv) 17 | { 18 | int ret; 19 | 20 | if (argc < 2) { 21 | fprintf(stderr, "Error: missing shenango config argument\n"); 22 | return 0; 23 | } 24 | 25 | char *cfg = argv[1]; 26 | argv[1] = argv[0]; 27 | main_argv = &argv[1]; 28 | main_argc = argc - 1; 29 | 30 | ret = runtime_init(cfg, runtime_entry, NULL); 31 | if (ret) { 32 | fprintf(stderr, "failed to start runtime\n"); 33 | return ret; 34 | } 35 | 36 | return main_ret; 37 | } 38 | -------------------------------------------------------------------------------- /shim/sleep.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "common.h" 8 | 9 | int usleep(useconds_t usec) 10 | { 11 | NOTSELF(usleep, usec); 12 | timer_sleep(usec); 13 | return 0; 14 | } 15 | 16 | unsigned int sleep(unsigned int seconds) 17 | { 18 | NOTSELF(sleep, seconds); 19 | timer_sleep(seconds * ONE_SECOND); 20 | return 0; 21 | } 22 | 23 | int nanosleep(const struct timespec *req, struct timespec *rem) 24 | { 25 | NOTSELF(nanosleep, req, rem); 26 | 27 | timer_sleep(req->tv_sec * ONE_SECOND + req->tv_nsec / 1000); 28 | 29 | if (rem) { 30 | rem->tv_sec = 0; 31 | rem->tv_nsec = 0; 32 | } 33 | 34 | return 0; 35 | } -------------------------------------------------------------------------------- /tests/.gitignore: -------------------------------------------------------------------------------- 1 | test_base_gen 2 | test_base_hello 3 | test_base_lrpc 4 | test_base_thread 5 | test_hello 6 | test_kthread_attach 7 | test_kthread_wakeup 8 | test_many_threads 9 | test_multiple_runtimes 10 | test_ping 11 | test_runtime_smalloc 12 | test_runtime_threads 13 | test_runtime_mutexes 14 | test_runtime_rcu 15 | test_runtime_timer 16 | test_smalloc 17 | test_thread 18 | test_udp_echo 19 | test_storage 20 | test_storage_iops 21 | netperf 22 | -------------------------------------------------------------------------------- /tests/test_base_gen.c: -------------------------------------------------------------------------------- 1 | /* 2 | * test_base_gen.c - tests generation numbers 3 | */ 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | static void test_gen(void) 10 | { 11 | uint32_t gen; 12 | struct gen_num gen_writer, gen_reader; 13 | 14 | /* init */ 15 | gen = 0; 16 | gen_init(&gen_writer, &gen); 17 | gen_init(&gen_reader, &gen); 18 | 19 | /* no gen -> gen */ 20 | gen_active(&gen_writer); 21 | BUG_ON(gen_in_same_gen(&gen_reader)); 22 | BUG_ON(!gen_in_same_gen(&gen_reader)); 23 | 24 | /* gen -> gen */ 25 | gen_active(&gen_writer); 26 | BUG_ON(!gen_in_same_gen(&gen_reader)); 27 | 28 | /* gen -> no gen -> gen */ 29 | gen_inactive(&gen_writer); 30 | gen_active(&gen_writer); 31 | BUG_ON(gen_in_same_gen(&gen_reader)); 32 | 33 | /* gen -> no gen */ 34 | gen_inactive(&gen_writer); 35 | BUG_ON(gen_in_same_gen(&gen_reader)); 36 | BUG_ON(gen_in_same_gen(&gen_reader)); 37 | 38 | /* no gen -> no gen */ 39 | gen_inactive(&gen_writer); 40 | BUG_ON(gen_in_same_gen(&gen_reader)); 41 | 42 | /* no gen -> gen -> no gen */ 43 | gen_active(&gen_writer); 44 | gen_inactive(&gen_writer); 45 | BUG_ON(gen_in_same_gen(&gen_reader)); 46 | 47 | log_debug("success"); 48 | } 49 | 50 | int main(int argc, char *argv[]) 51 | { 52 | int ret; 53 | 54 | ret = base_init(); 55 | if (ret) { 56 | log_err("base_init() failed, ret = %d", ret); 57 | return 1; 58 | } 59 | BUG_ON(!base_init_done); 60 | 61 | test_gen(); 62 | return 0; 63 | } 64 | -------------------------------------------------------------------------------- /tests/test_base_hello.c: -------------------------------------------------------------------------------- 1 | /* 2 | * test_base_hello.c - this tests verifies that the base library can initialize 3 | */ 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | int main(int argc, char *argv[]) 10 | { 11 | int ret; 12 | 13 | ret = base_init(); 14 | if (ret) { 15 | log_err("base_init() failed, ret = %d", ret); 16 | return 1; 17 | } 18 | BUG_ON(!base_init_done); 19 | 20 | ret = base_init_thread(); 21 | if (ret) { 22 | log_err("base_init_thread() failed, ret = %d", ret); 23 | return 1; 24 | } 25 | BUG_ON(!perthread_read(thread_init_done)); 26 | 27 | log_info("hello world!"); 28 | return 0; 29 | } 30 | -------------------------------------------------------------------------------- /tests/test_base_thread.c: -------------------------------------------------------------------------------- 1 | /* 2 | * test_base_thread.c - this base support for threads 3 | */ 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #define PERTHREAD_VAL 10 14 | static DEFINE_PERTHREAD(int, blah); 15 | 16 | static int init_thread(void) 17 | { 18 | int ret; 19 | 20 | ret = base_init_thread(); 21 | if (ret) { 22 | log_err("base_init_thread() failed, ret = %d", ret); 23 | return 1; 24 | } 25 | BUG_ON(!perthread_read(thread_init_done)); 26 | BUG_ON(perthread_get(blah) != 0); 27 | 28 | perthread_get(blah) = PERTHREAD_VAL; 29 | BUG_ON(perthread_get(blah) != PERTHREAD_VAL); 30 | 31 | return ret; 32 | } 33 | 34 | static void *test_thread(void *data) 35 | { 36 | int ret; 37 | 38 | ret = init_thread(); 39 | BUG_ON(ret); 40 | log_info("hello thread %d", this_thread_id()); 41 | 42 | return NULL; 43 | } 44 | 45 | int main(int argc, char *argv[]) 46 | { 47 | pthread_t tid[NCPU]; 48 | int ret, i; 49 | 50 | ret = base_init(); 51 | if (ret) { 52 | log_err("base_init() failed, ret = %d", ret); 53 | return 1; 54 | } 55 | BUG_ON(!base_init_done); 56 | BUG_ON(cpu_count < 1); 57 | 58 | init_thread(); 59 | 60 | for (i = 1; i < cpu_count; i++) { 61 | ret = pthread_create(&tid[i], NULL, test_thread, NULL); 62 | BUG_ON(ret); 63 | } 64 | 65 | for (i = 1; i < cpu_count; i++) { 66 | ret = pthread_join(tid[i], NULL); 67 | BUG_ON(ret); 68 | } 69 | 70 | log_info("joined all threads"); 71 | return 0; 72 | } 73 | -------------------------------------------------------------------------------- /tests/test_kthread_wakeup.c: -------------------------------------------------------------------------------- 1 | /* 2 | * test_kthread_wakeup.c - tests waking of kthreads 3 | */ 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #define NTHREADS 6 15 | #define N 500000 16 | #define SPAWN_LIMIT 5 17 | 18 | static atomic_t n_threads; 19 | static atomic_t n_spawned; 20 | static waitgroup_t wg; 21 | 22 | static void work_handler(void *arg) 23 | { 24 | int i, ret, n_to_spawn; 25 | waitgroup_t *wg_parent = &wg; 26 | 27 | /* do some busy work */ 28 | delay_us(100); 29 | 30 | if (atomic_read(&n_threads) < NTHREADS) { 31 | /* we have too few threads, spawn more */ 32 | n_to_spawn = rand() % SPAWN_LIMIT; 33 | 34 | if (atomic_dec_and_test(&n_threads) && n_to_spawn == 0) 35 | n_to_spawn = 1; 36 | 37 | for (i = 0; i < n_to_spawn; i++) { 38 | if (atomic_add_and_fetch(&n_spawned, 1) <= N) { 39 | atomic_inc(&n_threads); 40 | ret = thread_spawn(work_handler, NULL); 41 | BUG_ON(ret); 42 | } 43 | } 44 | } else { 45 | /* don't spawn any more */ 46 | atomic_dec(&n_threads); 47 | } 48 | waitgroup_done(wg_parent); 49 | } 50 | 51 | static void main_handler(void *arg) 52 | { 53 | int i, ret; 54 | 55 | log_info("started main_handler() thread"); 56 | 57 | atomic_write(&n_threads, 0); 58 | atomic_write(&n_spawned, 0); 59 | waitgroup_init(&wg); 60 | waitgroup_add(&wg, N); 61 | for (i = 0; i < NTHREADS; i++) { 62 | atomic_inc(&n_spawned); 63 | atomic_inc(&n_threads); 64 | ret = thread_spawn(work_handler, NULL); 65 | BUG_ON(ret); 66 | } 67 | 68 | waitgroup_wait(&wg); 69 | log_info("ran %d threads", N); 70 | } 71 | 72 | int main(int argc, char *argv[]) 73 | { 74 | int ret; 75 | 76 | if (argc < 2) { 77 | printf("arg must be config file\n"); 78 | return -EINVAL; 79 | } 80 | 81 | ret = runtime_init(argv[1], main_handler, NULL); 82 | if (ret) { 83 | printf("failed to start runtime\n"); 84 | return ret; 85 | } 86 | 87 | return 0; 88 | } 89 | -------------------------------------------------------------------------------- /tests/test_many_threads.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #define N 50000 10 | #define NCORES 4 11 | 12 | static void work_handler(void *arg) 13 | { 14 | waitgroup_t *wg_parent = (waitgroup_t *)arg; 15 | waitgroup_done(wg_parent); 16 | waitgroup_wait(wg_parent); 17 | } 18 | 19 | static void main_handler(void *arg) 20 | { 21 | waitgroup_t wg; 22 | double threads_per_second; 23 | uint64_t start_us; 24 | int i, ret; 25 | 26 | log_info("started main_handler() thread"); 27 | 28 | waitgroup_init(&wg); 29 | waitgroup_add(&wg, N); 30 | start_us = microtime(); 31 | for (i = 0; i < N; i++) { 32 | ret = thread_spawn(work_handler, &wg); 33 | BUG_ON(ret); 34 | thread_yield(); 35 | } 36 | 37 | waitgroup_wait(&wg); 38 | threads_per_second = (double)N / 39 | ((microtime() - start_us) * 0.000001); 40 | log_info("spawned %f threads / second", threads_per_second); 41 | } 42 | 43 | int main(int argc, char *argv[]) 44 | { 45 | int ret; 46 | 47 | if (argc < 2) { 48 | printf("arg must be config file\n"); 49 | return -EINVAL; 50 | } 51 | 52 | ret = runtime_init(argv[1], main_handler, NULL); 53 | if (ret) { 54 | printf("failed to start runtime"); 55 | return ret; 56 | } 57 | 58 | return 0; 59 | } 60 | -------------------------------------------------------------------------------- /tests/test_multiple_runtimes.c: -------------------------------------------------------------------------------- 1 | /* 2 | * test_multiple_runtimes.c - tests initialization of multiple runtimes 3 | */ 4 | 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #define N_RUNTIMES 2 13 | #define SLEEP_S 5 14 | 15 | static void main_handler(void *arg) 16 | { 17 | int i; 18 | 19 | for (i = 0; i < SLEEP_S; i++) 20 | timer_sleep(1000*1000); 21 | 22 | log_info("exiting runtime"); 23 | } 24 | 25 | int main(int argc, char *argv[]) 26 | { 27 | int i, pid, ret; 28 | 29 | if (argc < 1 + N_RUNTIMES) { 30 | printf("arg must provide a config file for each runtime\n"); 31 | return -EINVAL; 32 | } 33 | 34 | for (i = 0; i < N_RUNTIMES; i++) { 35 | pid = fork(); 36 | BUG_ON(pid == -1); 37 | 38 | if (pid == 0) { 39 | ret = runtime_init(argv[1 + i], main_handler, NULL); 40 | BUG_ON(ret < 0); 41 | } 42 | 43 | sleep(1); 44 | } 45 | 46 | return 0; 47 | } 48 | -------------------------------------------------------------------------------- /tests/test_ping.c: -------------------------------------------------------------------------------- 1 | /* 2 | * test_ping.c - sends ping echo requests 3 | */ 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #define N_PINGS 10 13 | #define DEST_IP_ADDR 3232235778 // 192.168.1.2 14 | 15 | static void main_handler(void *arg) 16 | { 17 | int i, ret; 18 | 19 | ret = net_ping_init(); 20 | if (ret) { 21 | log_err("failed to init ping"); 22 | return; 23 | } 24 | 25 | for (i = 0; i < N_PINGS; i++) { 26 | net_send_ping(i, DEST_IP_ADDR); 27 | 28 | /* wait 1 second before sending next ping */ 29 | timer_sleep(1000*1000); 30 | } 31 | } 32 | 33 | int main(int argc, char *argv[]) 34 | { 35 | int ret; 36 | 37 | if (argc < 2) { 38 | printf("arg must be config file\n"); 39 | return -EINVAL; 40 | } 41 | 42 | ret = runtime_init(argv[1], main_handler, NULL); 43 | if (ret) { 44 | printf("failed to start runtime\n"); 45 | return ret; 46 | } 47 | 48 | return 0; 49 | } 50 | -------------------------------------------------------------------------------- /tests/test_runtime_threads.c: -------------------------------------------------------------------------------- 1 | /* 2 | * test_runtime_thread.c - tests basic thread spawning 3 | */ 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #define N 1000000 14 | #define NCORES 4 15 | 16 | static void leaf_handler(void *arg) 17 | { 18 | waitgroup_t *wg_parent = (waitgroup_t *)arg; 19 | delay_us(1); 20 | waitgroup_done(wg_parent); 21 | } 22 | 23 | static void work_handler(void *arg) 24 | { 25 | waitgroup_t *wg_parent = (waitgroup_t *)arg; 26 | waitgroup_t wg; 27 | int i, ret; 28 | 29 | waitgroup_init(&wg); 30 | waitgroup_add(&wg, N); 31 | for (i = 0; i < N; i++) { 32 | ret = thread_spawn(leaf_handler, &wg); 33 | BUG_ON(ret); 34 | thread_yield(); 35 | } 36 | 37 | waitgroup_wait(&wg); 38 | waitgroup_done(wg_parent); 39 | } 40 | 41 | static void main_handler(void *arg) 42 | { 43 | waitgroup_t wg; 44 | double threads_per_second; 45 | uint64_t start_us; 46 | int i, ret; 47 | 48 | log_info("started main_handler() thread"); 49 | log_info("creating threads with 1us of fake work."); 50 | 51 | waitgroup_init(&wg); 52 | waitgroup_add(&wg, NCORES); 53 | start_us = microtime(); 54 | for (i = 0; i < NCORES; i++) { 55 | ret = thread_spawn(work_handler, &wg); 56 | BUG_ON(ret); 57 | } 58 | 59 | waitgroup_wait(&wg); 60 | threads_per_second = (double)(NCORES * N) / 61 | ((microtime() - start_us) * 0.000001); 62 | log_info("spawned %f threads / second, efficiency %f", 63 | threads_per_second, threads_per_second / 1000000); 64 | } 65 | 66 | int main(int argc, char *argv[]) 67 | { 68 | int ret; 69 | 70 | if (argc < 2) { 71 | printf("arg must be config file\n"); 72 | return -EINVAL; 73 | } 74 | 75 | ret = runtime_init(argv[1], main_handler, NULL); 76 | if (ret) { 77 | printf("failed to start runtime\n"); 78 | return ret; 79 | } 80 | 81 | return 0; 82 | } 83 | -------------------------------------------------------------------------------- /tests/test_runtime_timer.c: -------------------------------------------------------------------------------- 1 | /* 2 | * test_runtime_thread.c - tests basic thread spawning 3 | */ 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #define WORKERS 1000 15 | #define N 100000 16 | 17 | static void work_handler(void *arg) 18 | { 19 | waitgroup_t *wg_parent = (waitgroup_t *)arg; 20 | int i; 21 | 22 | for (i = 0; i < N; i++) 23 | timer_sleep(2); 24 | 25 | waitgroup_done(wg_parent); 26 | } 27 | 28 | static void main_handler(void *arg) 29 | { 30 | waitgroup_t wg; 31 | double timeouts_per_second; 32 | uint64_t start_us; 33 | int i, ret; 34 | 35 | log_info("started main_handler() thread"); 36 | 37 | waitgroup_init(&wg); 38 | waitgroup_add(&wg, WORKERS); 39 | start_us = microtime(); 40 | for (i = 0; i < WORKERS; i++) { 41 | ret = thread_spawn(work_handler, &wg); 42 | BUG_ON(ret); 43 | } 44 | 45 | waitgroup_wait(&wg); 46 | timeouts_per_second = (double)(WORKERS * N) / 47 | ((microtime() - start_us) * 0.000001); 48 | log_info("handled %f timeouts / second", timeouts_per_second); 49 | } 50 | 51 | int main(int argc, char *argv[]) 52 | { 53 | int ret; 54 | 55 | if (argc < 2) { 56 | printf("arg must be config file\n"); 57 | return -EINVAL; 58 | } 59 | 60 | ret = runtime_init(argv[1], main_handler, NULL); 61 | if (ret) { 62 | printf("failed to start runtime\n"); 63 | return ret; 64 | } 65 | 66 | return 0; 67 | } 68 | -------------------------------------------------------------------------------- /tests/test_storage.c: -------------------------------------------------------------------------------- 1 | /* 2 | * test_storage.c - writes and reads to the storage device 3 | */ 4 | 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | static void main_handler(void *arg) 15 | { 16 | int ret; 17 | uint32_t block_size; 18 | char *buf; 19 | 20 | block_size = storage_block_size(); 21 | log_info("num blocks: %lu", storage_num_blocks()); 22 | log_info("block size: %u", block_size); 23 | log_info("writing 'hello world' to device..."); 24 | if (block_size == 0) { 25 | log_info("storage support is disabled, skipping test"); 26 | return; 27 | } 28 | buf = malloc(block_size); 29 | BUG_ON(!buf); 30 | sprintf(buf, "hello world"); 31 | ret = storage_write(buf, 0, 1); 32 | if (ret) { 33 | log_err("failed to init storage"); 34 | return; 35 | } 36 | sprintf(buf, "cleared"); 37 | 38 | log_debug("reading from device..."); 39 | ret = storage_read(buf, 0, 1); 40 | if (ret) { 41 | log_err("failed to read"); 42 | } 43 | log_info("data read: %s", buf); 44 | free(buf); 45 | } 46 | 47 | int main(int argc, char *argv[]) 48 | { 49 | int ret; 50 | 51 | ret = runtime_init(argv[1], main_handler, NULL); 52 | if (ret) { 53 | log_err("failed to start runtime"); 54 | return ret; 55 | } 56 | return 0; 57 | } 58 | -------------------------------------------------------------------------------- /tests/test_storage_iops.c: -------------------------------------------------------------------------------- 1 | /* 2 | * test_storage_iops.c - tests write IOPS for storage device using shenango runtime 3 | */ 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | 18 | #define WORKERS 100 19 | #define N 100000 20 | 21 | static void work_handler(void *arg) 22 | { 23 | static atomic_t thread_counter; 24 | waitgroup_t *wg_parent = (waitgroup_t *)arg; 25 | int i, tid; 26 | char *p; 27 | 28 | 29 | p = malloc(4096); 30 | BUG_ON(!p); 31 | 32 | tid = atomic_fetch_and_add(&thread_counter, 1); 33 | 34 | for (i = 0; i < N; i++) 35 | BUG_ON(storage_write(p, 8 * (tid * N + i), 8)); 36 | 37 | waitgroup_done(wg_parent); 38 | } 39 | 40 | static void main_handler(void *arg) 41 | { 42 | waitgroup_t wg; 43 | double iops; 44 | uint64_t start_us; 45 | int i, ret; 46 | 47 | log_info("started main_handler() thread"); 48 | 49 | BUG_ON(8 * (N + 1) * WORKERS > storage_num_blocks()); 50 | 51 | waitgroup_init(&wg); 52 | waitgroup_add(&wg, WORKERS); 53 | start_us = microtime(); 54 | for (i = 0; i < WORKERS; i++) { 55 | ret = thread_spawn(work_handler, &wg); 56 | BUG_ON(ret); 57 | } 58 | 59 | waitgroup_wait(&wg); 60 | iops = (double)(WORKERS * N) / 61 | ((microtime() - start_us) * 0.000001); 62 | log_info("handled %f IOPS", iops); 63 | } 64 | 65 | int main(int argc, char *argv[]) 66 | { 67 | int ret; 68 | 69 | if (argc < 2) { 70 | printf("arg must be config file\n"); 71 | return -EINVAL; 72 | } 73 | 74 | ret = runtime_init(argv[1], main_handler, NULL); 75 | if (ret) { 76 | printf("failed to start runtime\n"); 77 | return ret; 78 | } 79 | 80 | return 0; 81 | } 82 | -------------------------------------------------------------------------------- /victim.config: -------------------------------------------------------------------------------- 1 | # an example runtime config file 2 | host_addr 192.168.1.8 3 | host_netmask 255.255.255.0 4 | host_gateway 192.168.1.1 5 | runtime_kthreads 10 6 | runtime_guaranteed_kthreads 10 7 | runtime_spinning_kthreads 0 8 | runtime_priority lc 9 | runtime_ht_punish_us 100 10 | runtime_qdelay_us 10 11 | --------------------------------------------------------------------------------