├── .gitignore
├── .gitmodules
├── CONTRIBUTING.md
├── LICENSE
├── Makefile
├── README.md
├── antagonist.config
├── apps
    ├── bench
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── README.md
    │   ├── RpcManager.cc
    │   ├── RpcManager.h
    │   ├── callibrate.cc
    │   ├── efficiency.cc
    │   ├── efficiency_go.go
    │   ├── efficiency_linux.cc
    │   ├── fake_worker.cc
    │   ├── fake_worker.h
    │   ├── flash_client.cc
    │   ├── linux_mech_bench.cc
    │   ├── memcached_router.cc
    │   ├── netbench.cc
    │   ├── netbench2.cc
    │   ├── netbench_linux.cc
    │   ├── netbench_udp.cc
    │   ├── netperf.cc
    │   ├── proto.h
    │   ├── storage_bench.cc
    │   ├── stress.cc
    │   ├── stress.config
    │   ├── stress_linux.cpp
    │   ├── tbench.cc
    │   ├── tbench.config
    │   └── waking.config
    ├── dpdk_netperf
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── README.md
    │   └── dpdk_netperf.c
    ├── netbench
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── distribution.cc
    │   ├── distribution.h
    │   ├── format.sh
    │   ├── interference.cc
    │   ├── netbench.cc
    │   ├── stress.cc
    │   ├── stress_linux.cc
    │   ├── stress_shm.cc
    │   ├── stress_shm_query.cc
    │   ├── synthetic_worker.cc
    │   ├── synthetic_worker.h
    │   ├── util.cc
    │   └── util.h
    ├── spdk_perf
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── README.md
    │   ├── perf.c
    │   └── run_perf.sh
    ├── storage_service
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── reflex.h
    │   ├── snappy.sh
    │   └── storage_server.cc
    ├── stream
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── stream.cc
    │   └── stream_query.cc
    ├── streamcluster
    │   ├── COPYRIGHT
    │   ├── Makefile
    │   ├── parsec_barrier.cpp
    │   ├── parsec_barrier.hpp
    │   └── streamcluster.cpp
    └── synthetic
    │   ├── .gitignore
    │   ├── Cargo.toml
    │   ├── build.rs
    │   ├── rust-toolchain.toml
    │   └── src
    │       ├── backend.rs
    │       ├── distribution.rs
    │       ├── dns.rs
    │       ├── fakework.rs
    │       ├── http.rs
    │       ├── lockstep.rs
    │       ├── main.rs
    │       ├── memcached.rs
    │       ├── payload.rs
    │       └── reflex.rs
├── base
    ├── base.ld
    ├── bitmap.c
    ├── cpu.c
    ├── fd_transfer.c
    ├── init.c
    ├── init_internal.h
    ├── jenkins_hash.c
    ├── list.c
    ├── log.c
    ├── lrpc.c
    ├── mem.c
    ├── mempool.c
    ├── page.c
    ├── pci.c
    ├── signal.c
    ├── slab.c
    ├── stat.c
    ├── syscall.S
    ├── sysfs.c
    ├── tcache.c
    ├── thread.c
    └── time.c
├── bindings
    ├── cc
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── net.cc
    │   ├── net.h
    │   ├── runtime.cc
    │   ├── runtime.h
    │   ├── storage.h
    │   ├── sync.h
    │   ├── test.cc
    │   ├── thread.cc
    │   ├── thread.h
    │   └── timer.h
    └── rust
    │   ├── .gitignore
    │   ├── Cargo.lock
    │   ├── Cargo.toml
    │   ├── build.rs
    │   ├── rust-toolchain.toml
    │   ├── shenango.h
    │   └── src
    │       ├── asm.rs
    │       ├── ffi.rs
    │       ├── lib.rs
    │       ├── storage.rs
    │       ├── tcp.rs
    │       ├── test_hello.rs
    │       ├── test_runtime_joinhandle.rs
    │       ├── test_runtime_threads.rs
    │       ├── test_smalloc.rs
    │       ├── thread.rs
    │       └── udp.rs
├── breakwater
    ├── Makefile
    ├── README.md
    ├── apps
    │   └── netbench
    │   │   ├── Makefile
    │   │   ├── client.config
    │   │   ├── netbench.cc
    │   │   ├── server.config
    │   │   ├── synthetic_worker.cc
    │   │   ├── synthetic_worker.h
    │   │   ├── util.cc
    │   │   └── util.h
    ├── bindings
    │   └── cc
    │   │   ├── Makefile
    │   │   ├── inc
    │   │       └── breakwater
    │   │       │   └── rpc++.h
    │   │   └── rpc++.cc
    ├── inc
    │   └── breakwater
    │   │   ├── breakwater.h
    │   │   ├── dagor.h
    │   │   ├── nocontrol.h
    │   │   ├── rpc.h
    │   │   └── seda.h
    ├── scripts
    │   └── setup_machine.sh
    └── src
    │   ├── bw_client.c
    │   ├── bw_config.h
    │   ├── bw_proto.h
    │   ├── bw_server.c
    │   ├── dg_client.c
    │   ├── dg_config.h
    │   ├── dg_proto.h
    │   ├── dg_server.c
    │   ├── nc_client.c
    │   ├── nc_config.h
    │   ├── nc_proto.h
    │   ├── nc_server.c
    │   ├── sd_client.c
    │   ├── sd_config.h
    │   ├── sd_proto.h
    │   ├── sd_server.c
    │   ├── util.c
    │   └── util.h
├── build
    ├── config
    ├── init_submodules.sh
    ├── mlx4_22_03.patch
    ├── mlx5_22_03.patch
    ├── patches
    │   ├── dpdk
    │   │   ├── 0001-config-extend-max-memseg-lists.patch
    │   │   ├── 0002-i40e-disable-itr.patch
    │   │   └── 0003-ixgbe-performance-tuning.patch
    │   └── rdma-core
    │   │   ├── 0001-fast-runtime-flow-steering.patch
    │   │   ├── 0002-vfio-directpath-driver-support.patch
    │   │   ├── 0003-enable-fast-flow-steering-in-vfio-mode.patch
    │   │   ├── 0004-expose-object-id.patch
    │   │   └── 0005-increase-max-number-of-qps-cqs.patch
    ├── pcm.patch
    ├── shared.mk
    ├── spdk.patch
    └── spdk2.patch
├── client.config
├── inc
    ├── asm
    │   ├── atomic.h
    │   ├── chksum.h
    │   ├── cpu.h
    │   └── ops.h
    ├── base
    │   ├── assert.h
    │   ├── atomic.h
    │   ├── bitmap.h
    │   ├── byteorder.h
    │   ├── compiler.h
    │   ├── cpu.h
    │   ├── fd_transfer.h
    │   ├── gen.h
    │   ├── hash.h
    │   ├── init.h
    │   ├── kref.h
    │   ├── limits.h
    │   ├── list.h
    │   ├── lock.h
    │   ├── log.h
    │   ├── lrpc.h
    │   ├── mem.h
    │   ├── mempool.h
    │   ├── page.h
    │   ├── pci.h
    │   ├── signal.h
    │   ├── slab.h
    │   ├── stat.h
    │   ├── stddef.h
    │   ├── syscall.h
    │   ├── sysfs.h
    │   ├── tcache.h
    │   ├── thread.h
    │   ├── time.h
    │   ├── trapframe.h
    │   └── types.h
    ├── iokernel
    │   ├── control.h
    │   ├── directpath.h
    │   ├── queue.h
    │   └── shm.h
    ├── net
    │   ├── arp.h
    │   ├── chksum.h
    │   ├── ethernet.h
    │   ├── icmp.h
    │   ├── ip.h
    │   ├── mbuf.h
    │   ├── mbufq.h
    │   ├── ping.h
    │   ├── tcp.h
    │   └── udp.h
    └── runtime
    │   ├── gc.h
    │   ├── net.h
    │   ├── poll.h
    │   ├── preempt.h
    │   ├── rcu.h
    │   ├── rculist.h
    │   ├── runtime.h
    │   ├── smalloc.h
    │   ├── storage.h
    │   ├── sync.h
    │   ├── tcp.h
    │   ├── thread.h
    │   ├── timer.h
    │   └── udp.h
├── iokernel
    ├── commands.c
    ├── control.c
    ├── defs.h
    ├── directpath
    │   ├── arp_fwd.c
    │   ├── command.c
    │   ├── core.c
    │   ├── defs.h
    │   ├── events.c
    │   ├── mlx5_ifc.h
    │   ├── queues.c
    │   └── steering.c
    ├── dma.c
    ├── dp_clients.c
    ├── dpdk.c
    ├── hw_timestamp.c
    ├── hw_timestamp.h
    ├── ias.c
    ├── ias.h
    ├── ias_bw.c
    ├── ias_ht.c
    ├── ias_ts.c
    ├── ksched.c
    ├── ksched.h
    ├── main.c
    ├── mempool_completion.c
    ├── numa.c
    ├── pcm.h
    ├── pmc.h
    ├── ref.h
    ├── rx.c
    ├── sched.c
    ├── sched.h
    ├── simple.c
    ├── stat.c
    ├── timer_wheel.c
    └── tx.c
├── ksched
    ├── .gitignore
    ├── Kbuild
    ├── Makefile
    ├── defs.h
    ├── fake_idle.c
    ├── ksched.h
    ├── ksched_main.c
    ├── uintr.c
    ├── uintr.h
    └── uintr_hw.h
├── net
    └── netdump.c
├── runtime
    ├── cfg.c
    ├── defs.h
    ├── gc.c
    ├── init.c
    ├── ioqueues.c
    ├── kthread.c
    ├── net
    │   ├── arp.c
    │   ├── core.c
    │   ├── defs.h
    │   ├── directpath
    │   │   ├── common.c
    │   │   ├── defs.h
    │   │   └── mlx5
    │   │   │   ├── mlx5.h
    │   │   │   ├── mlx5_flow_steering.c
    │   │   │   ├── mlx5_ifc.h
    │   │   │   ├── mlx5_init_common.c
    │   │   │   ├── mlx5_init_external.c
    │   │   │   ├── mlx5_init_verbs.c
    │   │   │   ├── mlx5_queue_steering.c
    │   │   │   ├── mlx5_rx_stride.c
    │   │   │   └── mlx5_rxtx.c
    │   ├── icmp.c
    │   ├── ping.c
    │   ├── tcp.c
    │   ├── tcp.h
    │   ├── tcp_debug.c
    │   ├── tcp_in.c
    │   ├── tcp_out.c
    │   ├── transport.c
    │   ├── udp.c
    │   └── waitq.h
    ├── poll.c
    ├── preempt.c
    ├── rcu.c
    ├── sched.c
    ├── smalloc.c
    ├── softirq.c
    ├── stack.c
    ├── stat.c
    ├── storage.c
    ├── switch.S
    ├── sync.c
    ├── timer.c
    └── uintr.S
├── sample.config
├── scripts
    ├── count_loc.sh
    ├── cstate.c
    ├── rstat.go
    ├── set_irq_affinity
    ├── setup_machine.sh
    ├── setup_vfs.sh
    └── spin.cc
├── server.config
├── shim
    ├── Makefile
    ├── README
    ├── common.h
    ├── entry.c
    ├── mem.c
    ├── pthread.c
    ├── sem.c
    ├── sleep.c
    ├── sync.c
    └── tls.c
├── tests
    ├── .gitignore
    ├── netperf.c
    ├── test_base_gen.c
    ├── test_base_hello.c
    ├── test_base_lrpc.c
    ├── test_base_thread.c
    ├── test_kthread_attach.c
    ├── test_kthread_wakeup.c
    ├── test_many_threads.c
    ├── test_multiple_runtimes.c
    ├── test_ping.c
    ├── test_runtime_mutexes.c
    ├── test_runtime_rcu.c
    ├── test_runtime_smalloc.c
    ├── test_runtime_threads.c
    ├── test_runtime_timer.c
    ├── test_storage.c
    └── test_storage_iops.c
└── victim.config


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.d
 2 | *.o
 3 | *.a
 4 | [._]*.sw[a-p]
 5 | iokerneld
 6 | *~
 7 | .cproject
 8 | .project
 9 | ._*
10 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "dpdk"]
 2 | 	path = dpdk
 3 | 	url = https://github.com/DPDK/dpdk
 4 | [submodule "spdk"]
 5 | 	path = spdk
 6 | 	url = https://github.com/spdk/spdk
 7 | [submodule "rdma-core"]
 8 | 	path = rdma-core
 9 | 	url = https://github.com/linux-rdma/rdma-core
10 | [submodule "apps/storage_service/snappy"]
11 | 	path = apps/storage_service/snappy
12 | 	url = https://github.com/google/snappy.git
13 | [submodule "deps/pcm"]
14 | 	path = deps/pcm
15 | 	url = https://github.com/opcm/pcm.git
16 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | ## Code Overview
 2 | 
 3 | apps - synthetic and benchmarking applications.
 4 | 
 5 | base - a extension to the standard C library that provides tools for managing
 6 | lists, memory, bitmaps, initialization, atomics, and several other useful
 7 | features.
 8 | 
 9 | bindings - language bindings (C++ and rust) for the runtime.
10 | 
11 | dpdk - [DPDK](https://www.dpdk.org/) library for accessing NIC queues
12 | from userspace.
13 | 
14 | iokernel - dedicated core that steers packets and reallocates cores
15 | across applications.
16 | 
17 | net - a packet manipulation library.
18 | 
19 | runtime - a user-level threading and networking runtime.
20 | 
21 | shim - a shim layer that enables running unmodified
22 | [PARSEC](http://parsec.cs.princeton.edu/) applications atop Shenango.
23 | 
24 | 
25 | ## Coding Style
26 | 
27 | Use the following conventions for C code:
28 | https://www.kernel.org/doc/html/v4.10/process/coding-style.html
29 | 
30 | Use the following conventions for C++ code:
31 | https://google.github.io/styleguide/cppguide.html
32 | 
33 | For third party libraries and tools, use their existing coding style.
34 | 
35 | For some helpful tips on how to write clean code, see:
36 | https://www.lysator.liu.se/c/pikestyle.html
37 | 


--------------------------------------------------------------------------------
/antagonist.config:
--------------------------------------------------------------------------------
1 | # an example runtime config file
2 | host_addr 192.168.1.9
3 | host_netmask 255.255.255.0
4 | host_gateway 192.168.1.1
5 | runtime_kthreads 20
6 | runtime_guaranteed_kthreads 0
7 | runtime_priority be
8 | runtime_qdelay_us 10
9 | 


--------------------------------------------------------------------------------
/apps/bench/.gitignore:
--------------------------------------------------------------------------------
 1 | callibrate
 2 | efficiency
 3 | efficiency_linux
 4 | stress
 5 | stress_linux
 6 | tbench
 7 | netbench
 8 | netbench2
 9 | netbench_udp
10 | netbench_linux
11 | netperf
12 | linux_mech_bench
13 | memcached_router
14 | flash_client
15 | storage_bench
16 | 


--------------------------------------------------------------------------------
/apps/bench/README.md:
--------------------------------------------------------------------------------
 1 | # Threading Benchmarks
 2 | 
 3 | First build Shenango and then build the benchmarks in this directory
 4 | with `make clean && make`. Run the main Shenango threading benchmarks
 5 | as follows (benchmarks will use a single runtime core).
 6 | 
 7 | In shenango directory:
 8 | ```
 9 | sudo ./iokerneld
10 | ```
11 | 
12 | In this directory:
13 | ```
14 | ./tbench tbench.config
15 | ```


--------------------------------------------------------------------------------
/apps/bench/callibrate.cc:
--------------------------------------------------------------------------------
 1 | #include "runtime.h"
 2 | #include "fake_worker.h"
 3 | 
 4 | #include <memory>
 5 | #include <iostream>
 6 | #include <chrono>
 7 | 
 8 | namespace {
 9 | 
10 | using us = std::chrono::duration<double, std::micro>;
11 | constexpr int kMultiply = 100000;
12 | 
13 | void Measure(FakeWorker *w, double target) {
14 |   double elapsed;
15 |   uint64_t i = 1;
16 | 
17 |   do {
18 |     i *= 2;
19 |     auto start = std::chrono::steady_clock::now();
20 |     for (int j = 0; j < kMultiply; j++) w->Work(i);
21 |     auto finish = std::chrono::steady_clock::now();
22 |     elapsed = std::chrono::duration_cast<us>(finish - start).count();
23 |   } while (elapsed < target * kMultiply);
24 | 
25 |   while (elapsed > target * kMultiply) {
26 |     --i;
27 |     auto start = std::chrono::steady_clock::now();
28 |     for (int j = 0; j < kMultiply; j++) w->Work(i);
29 |     auto finish = std::chrono::steady_clock::now();
30 |     elapsed = std::chrono::duration_cast<us>(finish - start).count();
31 |   }
32 | 
33 |   std::cout << i << " iterations took " << elapsed / kMultiply << " us."
34 |             << std::endl;
35 | }
36 | 
37 | } // anonymous namespace
38 | 
39 | int main(int argc, char *argv[]) {
40 |   if (argc != 3) {
41 |     std::cerr << "usage: [microseconds (double)] [worker_spec]" << std::endl;
42 |     return 1;
43 |   }
44 | 
45 |   FakeWorker *w = FakeWorkerFactory(argv[2]);
46 |   if (!w) {
47 |     std::cerr << "Invalid worker argument." << std::endl;
48 |     return 1;
49 |   }
50 |   Measure(w, std::stod(argv[1], nullptr));
51 | 
52 |   return 0;
53 | }
54 | 


--------------------------------------------------------------------------------
/apps/bench/fake_worker.h:
--------------------------------------------------------------------------------
 1 | // fake_worker.h - support for carefully controlled fake work generation
 2 | 
 3 | #pragma once
 4 | 
 5 | #include <cstddef>
 6 | #include <cstdint>
 7 | #include <vector>
 8 | #include <string>
 9 | 
10 | class FakeWorker {
11 |  public:
12 |   // Perform n iterations of fake work.
13 |   virtual void Work(uint64_t n) = 0;
14 | };
15 | 
16 | class SqrtWorker : public FakeWorker {
17 |  public:
18 |   SqrtWorker() {}
19 |   ~SqrtWorker() {}
20 | 
21 |   // Performs n iterations of sqrt().
22 |   void Work(uint64_t n); 
23 | };
24 | 
25 | class StridedMemtouchWorker : public FakeWorker {
26 |  public:
27 |   ~StridedMemtouchWorker() {delete buf_;}
28 | 
29 |   // Creates a strided memory touching worker.
30 |   static StridedMemtouchWorker *Create(std::size_t size, size_t stride);
31 | 
32 |   // Performs n strided memory touches.
33 |   void Work(uint64_t n);
34 | 
35 |  private:
36 |   StridedMemtouchWorker(char *buf, std::size_t size, size_t stride) :
37 | 	buf_(buf), size_(size), stride_(stride) { }
38 | 
39 |   volatile char *buf_;
40 |   std::size_t size_;
41 |   std::size_t stride_;
42 | };
43 | 
44 | class MemStreamWorker : public FakeWorker {
45 |  public:
46 |   ~MemStreamWorker();
47 | 
48 |   // Creates a memory streaming worker.
49 |   static MemStreamWorker *Create(std::size_t size);
50 | 
51 |   // Performs n memory reads.
52 |   void Work(uint64_t n);
53 | 
54 |  private:
55 |   MemStreamWorker(char *buf, std::size_t size) :
56 |   buf_(buf), size_(size) { }
57 | 
58 |   volatile char *buf_;
59 |   std::size_t size_;
60 | };
61 | 
62 | class RandomMemtouchWorker : public FakeWorker {
63 |  public:
64 |   ~RandomMemtouchWorker() {delete buf_;}
65 | 
66 |   // Creates a random memory touching worker.
67 |   static RandomMemtouchWorker *Create(std::size_t size, unsigned int seed);
68 | 
69 |   // Performs n random memory touches.
70 |   void Work(uint64_t n);
71 | 
72 |  private:
73 |   RandomMemtouchWorker(char *buf, std::vector<unsigned int> schedule) :
74 | 	buf_(buf), schedule_(std::move(schedule)) { }
75 | 
76 |   volatile char *buf_;
77 |   std::vector<unsigned int> schedule_;
78 | };
79 | 
80 | // Parses a string to generate one of the above fake workers.
81 | FakeWorker *FakeWorkerFactory(std::string s);
82 | 


--------------------------------------------------------------------------------
/apps/bench/proto.h:
--------------------------------------------------------------------------------
 1 | // a really basic encoding for experiment messages
 2 | 
 3 | #pragma once
 4 | 
 5 | // The netbench server responds to this port.
 6 | constexpr uint64_t kNetbenchPort = 8001;
 7 | 
 8 | constexpr uint32_t kMagic = 0x6e626368; // 'nbch'
 9 | constexpr uint32_t kKill = 0x6b696c6c; // 'kill'
10 | 
11 | struct nbench_req {
12 |   uint32_t magic;
13 |   int nports;
14 | };
15 | 
16 | struct nbench_resp {
17 |   uint32_t magic;
18 |   int nports;
19 |   uint16_t ports[];
20 | };
21 | 
22 | struct payload {
23 |   uint32_t tag;
24 |   uint64_t idx;
25 |   double workn;
26 |   char pad[];
27 | };
28 | 


--------------------------------------------------------------------------------
/apps/bench/stress.cc:
--------------------------------------------------------------------------------
 1 | extern "C" {
 2 | #include <base/log.h>
 3 | #undef min
 4 | #undef max
 5 | }
 6 | 
 7 | #include "runtime.h"
 8 | #include "thread.h"
 9 | #include "sync.h"
10 | #include "timer.h"
11 | #include "fake_worker.h"
12 | 
13 | #include <iostream>
14 | #include <chrono>
15 | 
16 | namespace {
17 | 
18 | int threads;
19 | uint64_t n;
20 | std::string worker_spec;
21 | 
22 | void MainHandler(void *arg) {
23 |   rt::WaitGroup wg(1);
24 |   std::vector<uint64_t> cnt(threads);
25 | 
26 |   for (int i = 0; i < threads; ++i) {
27 |     rt::Spawn([&,i](){
28 |       auto *w = FakeWorkerFactory(worker_spec);
29 |       if (w == nullptr) {
30 |         std::cerr << "Failed to create worker." << std::endl;
31 |         exit(1);
32 |       }
33 | 
34 |       while (true) {
35 |         w->Work(n);
36 |         cnt[i]++;
37 |         rt::Yield();
38 |       }
39 |     });
40 |   }
41 | 
42 |   rt::Spawn([&](){
43 |     uint64_t last_total = 0;
44 |     auto last = std::chrono::steady_clock::now();
45 |     while (1) {
46 |       rt::Sleep(rt::kSeconds);
47 |       auto now = std::chrono::steady_clock::now();
48 |       uint64_t total = 0;
49 |       double duration = std::chrono::duration_cast<
50 |         std::chrono::duration<double>>(now - last).count();
51 |       for (int i = 0; i < threads; i++) total += cnt[i];
52 |       log_info("%f", static_cast<double>(total - last_total) / duration);
53 |       last_total = total;
54 |       last = now;
55 |     }
56 |   });
57 | 
58 |   // never returns
59 |   wg.Wait();
60 | }
61 | 
62 | } // anonymous namespace
63 | 
64 | int main(int argc, char *argv[]) {
65 |   int ret;
66 | 
67 |   if (argc != 5) {
68 |     std::cerr << "usage: [config_file] [#threads] [#n] [worker_spec]"
69 |               << std::endl;
70 |     return -EINVAL;
71 |   }
72 | 
73 |   threads = std::stoi(argv[2], nullptr, 0);
74 |   n = std::stoul(argv[3], nullptr, 0);
75 |   worker_spec = std::string(argv[4]);
76 | 
77 |   ret = runtime_init(argv[1], MainHandler, NULL);
78 |   if (ret) {
79 |     printf("failed to start runtime\n");
80 |     return ret;
81 |   }
82 | 
83 |   return 0;
84 | }
85 | 


--------------------------------------------------------------------------------
/apps/bench/stress.config:
--------------------------------------------------------------------------------
1 | # an example runtime config file for filling all unused cores
2 | # (assuming 24 total available) with batch work
3 | host_addr 192.168.1.111
4 | host_netmask 255.255.255.0
5 | host_gateway 192.168.1.1
6 | runtime_kthreads 22
7 | 


--------------------------------------------------------------------------------
/apps/bench/stress_linux.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "fake_worker.h"
 3 | 
 4 | #include <iostream>
 5 | #include <chrono>
 6 | #include <thread>
 7 | 
 8 | namespace {
 9 | 
10 | int threads;
11 | uint64_t n;
12 | std::string worker_spec;
13 | 
14 | void MainHandler(void *arg) {
15 |   std::vector<uint64_t> cnt(threads);
16 | 
17 |   for (int i = 0; i < threads; ++i) {
18 |     std::thread([i, &cnt](){
19 |       auto *w = FakeWorkerFactory(worker_spec);
20 |       if (w == nullptr) {
21 |         std::cerr << "Failed to create worker." << std::endl;
22 |         exit(1);
23 |       }
24 | 
25 |       while (true) {
26 |         w->Work(n);
27 |         cnt[i]++;
28 |       }
29 |     }).detach();
30 |   }
31 | 
32 |   std::thread([&](){
33 |     uint64_t last_total = 0;
34 |     auto last = std::chrono::steady_clock::now();
35 |     while (1) {
36 |       std::chrono::seconds sec(1);
37 |       std::this_thread::sleep_for(sec);
38 |       auto now = std::chrono::steady_clock::now();
39 |       uint64_t total = 0;
40 |       double duration = std::chrono::duration_cast<
41 |         std::chrono::duration<double>>(now - last).count();
42 |       for (int i = 0; i < threads; i++) total += cnt[i];
43 |       std::cerr << static_cast<double>(total - last_total) / duration
44 |                 << std::endl;
45 |       last_total = total;
46 |       last = now;
47 |     }
48 |   }).join();
49 | 
50 |   // never returns
51 | }
52 | 
53 | } // anonymous namespace
54 | 
55 | int main(int argc, char *argv[]) {
56 | 
57 |   if (argc != 4) {
58 |     std::cerr << "usage: [#threads] [#n] [worker_spec]"
59 |               << std::endl;
60 |     return -EINVAL;
61 |   }
62 | 
63 |   threads = std::stoi(argv[1], nullptr, 0);
64 |   n = std::stoul(argv[2], nullptr, 0);
65 |   worker_spec = std::string(argv[3]);
66 | 
67 |   MainHandler(NULL);
68 | 
69 |   return 0;
70 | }
71 | 


--------------------------------------------------------------------------------
/apps/bench/tbench.config:
--------------------------------------------------------------------------------
1 | # an example runtime config file
2 | host_addr 192.168.1.2
3 | host_netmask 255.255.255.0
4 | host_gateway 192.168.1.1
5 | runtime_kthreads 1
6 | runtime_guaranteed_kthreads 1
7 | runtime_spinning_kthreads 1
8 | disable_watchdog 1


--------------------------------------------------------------------------------
/apps/bench/waking.config:
--------------------------------------------------------------------------------
1 | # an example runtime config file
2 | host_addr 192.168.1.2
3 | host_netmask 255.255.255.0
4 | host_gateway 192.168.1.1
5 | runtime_kthreads 1
6 | runtime_guaranteed_kthreads 1
7 | 


--------------------------------------------------------------------------------
/apps/dpdk_netperf/.gitignore:
--------------------------------------------------------------------------------
1 | build


--------------------------------------------------------------------------------
/apps/dpdk_netperf/README.md:
--------------------------------------------------------------------------------
 1 | # Latency Benchmarks
 2 | 
 3 | First build DPDK (without driver modifications), then build
 4 | dpdk_netperf in this directory with `make clean && make`.
 5 | 
 6 | ## DPDK only
 7 | To run the benchmark with pure DPDK on both machines:
 8 | 
 9 | On the server (IP 192.168.1.2):
10 | ```
11 | sudo ./build/dpdk_netperf -l2 --socket-mem=128 -- UDP_SERVER 192.168.1.2
12 | ```
13 | 
14 | On the client (IP 192.168.1.3):
15 | ```
16 | sudo ./build/dpdk_netperf -l2 --socket-mem=128 -- UDP_CLIENT 192.168.1.3 192.168.1.2 50000 8001 10 8 50
17 | ```
18 | 
19 | ## Shenango spinning (IOKernel + runtime)
20 | 
21 | To run Shenango with the server runtime thread spinning, start the
22 | IOKernel and then in `shenango/apps/bench`:
23 | 
24 | ```
25 | ./netbench_udp tbench.config server
26 | ```
27 | Then run the client as above.
28 | 
29 | ## Shenango waking (IOKernel + runtime + wakeup)
30 | 
31 | To run with Shenango in its default mode but no batch work, start the
32 | IOKernel and then in `shenango/apps/bench`:
33 | ```
34 | ./netbench_udp waking.config server
35 | ```
36 | Then run the client as above.
37 | 
38 | ## Shenango preempting (IOKernel + runtime + wakeup + preemption)
39 | 
40 | To run Shenango with a batch application running concurrently, start
41 | the IOKernel and then in `shenango/apps/bench`:
42 | ```
43 | ./stress stress.config 100 100 sqrt
44 | ./netbench_udp waking.config server
45 | ```
46 | 
47 | Then run the client as above. If your server does not have 24
48 | hyperthreads, you will need to adjust `runtime_kthreads` in
49 | stress.config to be 2 fewer than the number of hyperthreads on your
50 | server.


--------------------------------------------------------------------------------
/apps/netbench/.gitignore:
--------------------------------------------------------------------------------
1 | stress
2 | netbench
3 | interference
4 | stress_linux
5 | stress_shm
6 | stress_shm_query
7 | 


--------------------------------------------------------------------------------
/apps/netbench/distribution.cc:
--------------------------------------------------------------------------------
 1 | // distribution.cc - support for generating random distributions
 2 | 
 3 | #include "distribution.h"
 4 | #include "util.h"
 5 | 
 6 | #include <string>
 7 | 
 8 | Distribution *DistributionFactory(std::string s) {
 9 |   std::vector<std::string> tokens = split(s, ':');
10 | 
11 |   // the first token is the type of worker, must be specified
12 |   auto cnt = tokens.size();
13 |   if (cnt < 1) return nullptr;
14 | 
15 |   if (tokens[0] == "fixed" && cnt == 2) {
16 |     double val = std::stod(tokens[1], nullptr);
17 |     return new FixedDistribution(val);
18 |   } else if (tokens[0] == "exponential" && cnt == 2) {
19 |     double val = std::stod(tokens[1], nullptr);
20 |     return new ExponentialDistribution(rand(), val);
21 |   } else if (tokens[0] == "bimodal" && cnt == 4) {
22 |     double low = std::stod(tokens[1], nullptr);
23 |     double high = std::stod(tokens[2], nullptr);
24 |     double frac = std::stod(tokens[3], nullptr);
25 |     return new BimodalDistribution(rand(), low, high, frac);
26 |   }
27 | 
28 |   // invalid type of worker
29 |   return nullptr;
30 | }
31 | 


--------------------------------------------------------------------------------
/apps/netbench/distribution.h:
--------------------------------------------------------------------------------
 1 | // distribution.h - support for generating random distributions
 2 | 
 3 | #pragma once
 4 | 
 5 | #include <random>
 6 | 
 7 | class Distribution {
 8 |  public:
 9 |   virtual ~Distribution() {}
10 | 
11 |   // Generate the next sample.
12 |   virtual double operator()() = 0;
13 |   virtual double Mean() const = 0;
14 | };
15 | 
16 | class FixedDistribution : public Distribution {
17 |  public:
18 |   FixedDistribution(double val) : val_(val) {}
19 |   ~FixedDistribution() {}
20 | 
21 |   double operator()() { return val_; }
22 |   double Mean() const { return val_; }
23 | 
24 |  private:
25 |   const double val_;
26 | };
27 | 
28 | class BimodalDistribution : public Distribution {
29 |  public:
30 |   BimodalDistribution(int seed, double low, double high, double fraction)
31 |       : frac_(fraction), low_(low), high_(high), rand_(seed), dist_(0.0, 1.0) {}
32 | 
33 |   double operator()() { return dist_(rand_) > frac_ ? high_ : low_; }
34 |   double Mean() const { return high_ * (1 - frac_) + low_ * frac_; }
35 | 
36 |  private:
37 |   const double frac_;
38 |   const double low_;
39 |   const double high_;
40 |   std::mt19937 rand_;
41 |   std::uniform_real_distribution<double> dist_;
42 | };
43 | 
44 | class ExponentialDistribution : public Distribution {
45 |  public:
46 |   ExponentialDistribution(int seed, double mean)
47 |       : mean_(mean), rand_(seed), dist_(1.0f / mean) {}
48 |   ~ExponentialDistribution() {}
49 | 
50 |   double operator()() { return dist_(rand_); }
51 |   double Mean() const { return mean_; }
52 | 
53 |  private:
54 |   const double mean_;
55 |   std::mt19937 rand_;
56 |   std::exponential_distribution<double> dist_;
57 | };
58 | 
59 | // Parses a string to generate one of the above distributions.
60 | Distribution *DistributionFactory(std::string s);
61 | 


--------------------------------------------------------------------------------
/apps/netbench/format.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | # formats all source files
3 | clang-format -i -style=google *.cc *.h
4 | 


--------------------------------------------------------------------------------
/apps/netbench/stress.cc:
--------------------------------------------------------------------------------
 1 | extern "C" {
 2 | #include <base/log.h>
 3 | }
 4 | 
 5 | #include "runtime.h"
 6 | #include "sync.h"
 7 | #include "synthetic_worker.h"
 8 | #include "thread.h"
 9 | #include "timer.h"
10 | 
11 | #include <chrono>
12 | #include <iostream>
13 | 
14 | barrier_t barrier;
15 | 
16 | bool synth_barrier_wait() { return barrier_wait(&barrier); }
17 | 
18 | namespace {
19 | 
20 | int threads;
21 | uint64_t n;
22 | std::string worker_spec;
23 | 
24 | void MainHandler(void *arg) {
25 |   rt::WaitGroup wg(1);
26 |   std::vector<uint64_t> cnt(threads);
27 | 
28 |   barrier_init(&barrier, threads);
29 | 
30 |   for (int i = 0; i < threads; ++i) {
31 |     rt::Spawn([&, i]() {
32 |       auto *w = SyntheticWorkerFactory(worker_spec);
33 |       if (w == nullptr) {
34 |         std::cerr << "Failed to create worker." << std::endl;
35 |         exit(1);
36 |       }
37 | 
38 |       while (true) {
39 |         w->Work(n);
40 |         cnt[i]++;
41 |         rt::Yield();
42 |       }
43 |     });
44 |   }
45 | 
46 |   rt::Spawn([&]() {
47 |     uint64_t last_total = 0;
48 |     auto last = std::chrono::steady_clock::now();
49 |     while (1) {
50 |       rt::Sleep(rt::kSeconds);
51 |       auto now = std::chrono::steady_clock::now();
52 |       uint64_t total = 0;
53 |       double duration =
54 |           std::chrono::duration_cast<std::chrono::duration<double>>(now - last)
55 |               .count();
56 |       for (int i = 0; i < threads; i++) total += cnt[i];
57 |       preempt_disable();
58 |       log_info("%f", static_cast<double>(total - last_total) / duration);
59 |       preempt_enable();
60 |       last_total = total;
61 |       last = now;
62 |     }
63 |   });
64 | 
65 |   // never returns
66 |   wg.Wait();
67 | }
68 | 
69 | }  // anonymous namespace
70 | 
71 | int main(int argc, char *argv[]) {
72 |   int ret;
73 | 
74 |   if (argc != 5) {
75 |     std::cerr << "usage: [config_file] [#threads] [#n] [worker_spec]"
76 |               << std::endl;
77 |     return -EINVAL;
78 |   }
79 | 
80 |   threads = std::stoi(argv[2], nullptr, 0);
81 |   n = std::stoul(argv[3], nullptr, 0);
82 |   worker_spec = std::string(argv[4]);
83 | 
84 |   ret = runtime_init(argv[1], MainHandler, NULL);
85 |   if (ret) {
86 |     printf("failed to start runtime\n");
87 |     return ret;
88 |   }
89 | 
90 |   return 0;
91 | }
92 | 


--------------------------------------------------------------------------------
/apps/netbench/stress_shm.cc:
--------------------------------------------------------------------------------
 1 | extern "C" {
 2 | #include <base/log.h>
 3 | }
 4 | 
 5 | #include "runtime.h"
 6 | #include "sync.h"
 7 | #include "synthetic_worker.h"
 8 | #include "thread.h"
 9 | #include "timer.h"
10 | 
11 | #include <sys/shm.h>
12 | #include <unistd.h>
13 | #include <chrono>
14 | #include <iostream>
15 | 
16 | #define SHM_KEY (0x123)
17 | 
18 | barrier_t barrier;
19 | bool use_barrier = false;
20 | bool synth_barrier_wait() { return barrier_wait(&barrier); }
21 | namespace {
22 | 
23 | int threads;
24 | uint64_t n;
25 | std::string worker_spec;
26 | 
27 | void MainHandler(void *arg) {
28 |   uint64_t *cnt;
29 |   int shmid =
30 |       shmget((key_t)SHM_KEY, sizeof(uint64_t) * threads * 8, 0777 | IPC_CREAT);
31 |   void *shm = NULL;
32 |   shm = shmat(shmid, 0, 0);
33 |   cnt = (uint64_t *)shm;
34 | 
35 |   rt::WaitGroup wg(1);
36 |   barrier_init(&barrier, threads);
37 | 
38 |   for (int i = 0; i < threads; ++i) {
39 |     rt::Spawn([&, i]() {
40 |       auto *w = SyntheticWorkerFactory(worker_spec);
41 |       if (w == nullptr) {
42 |         std::cerr << "Failed to create worker." << std::endl;
43 |         exit(1);
44 |       }
45 | 
46 |       while (true) {
47 |         w->Work(n);
48 |         cnt[i * 8]++;
49 |         if (use_barrier)
50 |           synth_barrier_wait();
51 |         else
52 |           rt::Yield();
53 |       }
54 |     });
55 |   }
56 | 
57 |   // never returns
58 |   wg.Wait();
59 | }
60 | 
61 | }  // anonymous namespace
62 | 
63 | void PrintUsage() {
64 |   std::cerr
65 |       << "usage: [config_file] [#threads] [#n] [worker_spec] <use_barrier>"
66 |       << std::endl;
67 | }
68 | 
69 | int main(int argc, char *argv[]) {
70 |   int ret;
71 | 
72 |   if (argc < 5) {
73 |     PrintUsage();
74 |     return -EINVAL;
75 |   }
76 | 
77 |   threads = std::stoi(argv[2], nullptr, 0);
78 |   n = std::stoul(argv[3], nullptr, 0);
79 |   worker_spec = std::string(argv[4]);
80 | 
81 |   if (argc > 5) {
82 |     if (std::string(argv[5]) != "use_barrier") {
83 |       PrintUsage();
84 |       return -EINVAL;
85 |     }
86 |     use_barrier = true;
87 |   }
88 | 
89 |   ret = runtime_init(argv[1], MainHandler, NULL);
90 |   if (ret) {
91 |     printf("failed to start runtime\n");
92 |     return ret;
93 |   }
94 | 
95 |   return 0;
96 | }
97 | 


--------------------------------------------------------------------------------
/apps/netbench/util.cc:
--------------------------------------------------------------------------------
 1 | #include "util.h"
 2 | 
 3 | std::vector<std::string> split(const std::string &text, char sep) {
 4 |   std::vector<std::string> tokens;
 5 |   std::string::size_type start = 0, end = 0;
 6 |   while ((end = text.find(sep, start)) != std::string::npos) {
 7 |     tokens.push_back(text.substr(start, end - start));
 8 |     start = end + 1;
 9 |   }
10 |   tokens.push_back(text.substr(start));
11 |   return tokens;
12 | }
13 | 


--------------------------------------------------------------------------------
/apps/netbench/util.h:
--------------------------------------------------------------------------------
 1 | // util.h - a collection of shared utilities
 2 | 
 3 | #pragma once
 4 | 
 5 | #include "timer.h"
 6 | 
 7 | #include <chrono>
 8 | #include <string>
 9 | #include <tuple>
10 | #include <vector>
11 | 
12 | using namespace std::chrono;
13 | 
14 | struct work_unit {
15 |   double start_us, work_us, duration_us;
16 |   int cpu;
17 | };
18 | 
19 | template <class Arrival, class Service>
20 | std::vector<work_unit> GenerateWork(Arrival a, Service s, double cur_us,
21 |                                     double last_us, int cpu) {
22 |   std::vector<work_unit> w;
23 |   while (cur_us < last_us) {
24 |     cur_us += a();
25 |     w.emplace_back(work_unit{cur_us, s(), 0, cpu});
26 |   }
27 |   return w;
28 | }
29 | 
30 | template <class Arrival, class Service>
31 | std::vector<work_unit> GenerateWork(Arrival a, Service *s, double cur_us,
32 |                                     double last_us, int cpu) {
33 |   std::vector<work_unit> w;
34 |   while (cur_us < last_us) {
35 |     cur_us += a();
36 |     w.emplace_back(work_unit{cur_us, (*s)(), 0, cpu});
37 |   }
38 |   return w;
39 | }
40 | 
41 | std::vector<std::string> split(const std::string &text, char sep);
42 | 
43 | class Timer {
44 |  public:
45 |   using micro = duration<double, std::micro>;
46 | 
47 |   Timer() {
48 |     barrier();
49 |     start_ts_ = steady_clock::now();
50 |     barrier();
51 |   }
52 |   ~Timer(){};
53 | 
54 |   // Reset the timer start time.
55 |   void Reset() {
56 |     barrier();
57 |     start_ts_ = steady_clock::now();
58 |     barrier();
59 |   }
60 | 
61 |   // Returns the microseconds elapsed since the timer was constructed.
62 |   double Elapsed() {
63 |     barrier();
64 |     auto now = steady_clock::now();
65 |     barrier();
66 |     return duration_cast<micro>(now - start_ts_).count();
67 |   }
68 | 
69 |   // Busy spin until the deadline (in microseconds) passes.
70 |   void SpinUntil(double deadline) {
71 |     while (Elapsed() < deadline) cpu_relax();
72 |   }
73 | 
74 |   // Sleep until the deadline (in microseconds) passes.
75 |   void SleepUntil(double deadline) {
76 |     double diff = deadline - Elapsed();
77 |     if (diff <= 0) return;
78 |     rt::Sleep(static_cast<uint64_t>(diff));
79 |   }
80 | 
81 |  private:
82 |   time_point<steady_clock> start_ts_;
83 | };
84 | 


--------------------------------------------------------------------------------
/apps/spdk_perf/.gitignore:
--------------------------------------------------------------------------------
1 | perf
2 | 


--------------------------------------------------------------------------------
/apps/spdk_perf/Makefile:
--------------------------------------------------------------------------------
 1 | #
 2 | #  BSD LICENSE
 3 | #
 4 | #  Copyright (c) Intel Corporation.
 5 | #  All rights reserved.
 6 | #
 7 | #  Redistribution and use in source and binary forms, with or without
 8 | #  modification, are permitted provided that the following conditions
 9 | #  are met:
10 | #
11 | #    * Redistributions of source code must retain the above copyright
12 | #      notice, this list of conditions and the following disclaimer.
13 | #    * Redistributions in binary form must reproduce the above copyright
14 | #      notice, this list of conditions and the following disclaimer in
15 | #      the documentation and/or other materials provided with the
16 | #      distribution.
17 | #    * Neither the name of Intel Corporation nor the names of its
18 | #      contributors may be used to endorse or promote products derived
19 | #      from this software without specific prior written permission.
20 | #
21 | #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 | #  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 | #  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 | #  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 | #  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 | #  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 | #  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 | #  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 | #  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 | #  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 | #  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 | #
33 | 
34 | SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../spdk)
35 | include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
36 | 
37 | APP = perf
38 | 
39 | include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk
40 | 
41 | CFLAGS += -I$(SPDK_ROOT_DIR)/dpdk/build/include
42 | LIBS += -lm
43 | 


--------------------------------------------------------------------------------
/apps/spdk_perf/README.md:
--------------------------------------------------------------------------------
1 | Copied from https://github.com/anakli/spdk_perf and modified to use newer SPDK version.
2 | 
3 | To build, run make.
4 | 


--------------------------------------------------------------------------------
/apps/spdk_perf/run_perf.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | #!/bin/bash
 3 | 
 4 | # Script to run perf tests
 5 | # June 2016
 6 | 
 7 | # Parameters:
 8 | # 	$1 = output_filename
 9 | 
10 | 
11 | if [ $# -ne 1 ]
12 |   then
13 |     echo "Usage: ./run_perf.sh [output_filename]"
14 | fi
15 | 
16 | # Create output file and set permissions for root to write
17 | touch $1
18 | chmod o+w $1
19 | 
20 | printf "Workload; Read Ratio; Num cores; Max Qdepth; Req Size; Target IOPS; Rd IOPS; Wr IOPS; Rd Avg; Rd p95; Rd p99; Rd p99.9; Rd p99.99; Wr Avg; Wr p95; Wr p99; Wr p99.9; Wr p99.99; Total p99.9; #dropped \n" >> $1
21 | 
22 | 
23 | # sweep request sizes
24 | for s in 4096 # 1024 8192 16384 32768 65536
25 | do
26 | 	# sweep read/write ratios
27 | 	for m in 0 50 100 #100 75 50 25 0 # 99 95 90 85 80 75 70 60 50 40 30 20 10 0 
28 | 	do
29 | 		# sweep target IOPS
30 | 		# note lambda is the target IOPS *per core*
31 | 		# total target IOPS is lambda times the num cores, specified via coremask parameter
32 | 		for lambda in `seq 20000 20000 700000`
33 | 		do
34 | 			printf "randrw-openloop-exp; %d; 1; 2122000; %d; %d;" "$m"  "$s" "$lambda" >> $1
35 | 			#sudo ./perf -t 120 -s 4096 -q 1024 -w randrw -M $m -c 1 -o $1 -L $lambda 
36 | 			# note: -c is the coremask in hex
37 | 		    #       -c 1 means use a single core 
38 | 		    # 	    -c 3 means use 2 cores
39 | 		    #       -c f means use 4 cores	
40 | 			#		keep in mind, to achieve high IOPS, may need more than 1 core
41 | 			sudo timeout 30 ./perf -t 10 -s $s -q 2122000 -w randrw -M $m -c 0x1 -o $1 -L $lambda 
42 | 		done
43 | 		printf "\n" >> $1
44 | 	done
45 | done
46 | 


--------------------------------------------------------------------------------
/apps/storage_service/.gitignore:
--------------------------------------------------------------------------------
1 | storage_server
2 | 


--------------------------------------------------------------------------------
/apps/storage_service/Makefile:
--------------------------------------------------------------------------------
 1 | # Makefile for netbench
 2 | ROOT_PATH=../..
 3 | include $(ROOT_PATH)/build/shared.mk
 4 | 
 5 | librt_libs = $(ROOT_PATH)/bindings/cc/librt++.a
 6 | INC += -I$(ROOT_PATH)/bindings/cc
 7 | 
 8 | storage_server_src = storage_server.cc
 9 | storage_server_obj = $(storage_server_src:.cc=.o)
10 | 
11 | 
12 | INC += -Isnappy/ -Isnappy/build/
13 | LIBS += snappy/build/libsnappy.a -lcrypto
14 | 
15 | src = $(storage_server_src)
16 | obj = $(src:.cc=.o)
17 | dep = $(obj:.o=.d)
18 | 
19 | # must be first
20 | all: storage_server
21 | 
22 | storage_server: $(storage_server_obj) $(librt_libs) $(RUNTIME_DEPS)
23 | 	$(LDXX) -o $@ $(LDFLAGS) ../../shim/libshim.a -ldl  $(obj) $(librt_libs) $(RUNTIME_LIBS) $(LIBS)
24 | 
25 | ifneq ($(MAKECMDGOALS),clean)
26 | -include $(dep)   # include all dep files in the makefile
27 | endif
28 | 
29 | # rule to generate a dep file by using the C preprocessor
30 | # (see man cpp for details on the -MM and -MT options)
31 | %.d: %.cc
32 | 	@$(CXX) $(CXXFLAGS) $< -MM -MT $(@:.d=.o) >$@
33 | %.o: %.cc
34 | 	$(CXX) $(CXXFLAGS) -c $< -o $@
35 | 
36 | .PHONY: clean
37 | clean:
38 | 	rm -f $(obj) $(dep) storage_server
39 | 


--------------------------------------------------------------------------------
/apps/storage_service/reflex.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2015-2017, Stanford University
 3 |  *
 4 |  * All rights reserved.
 5 |  *
 6 |  * Redistribution and use in source and binary forms, with or without
 7 |  * modification, are permitted provided that the following conditions are met:
 8 |  *
 9 |  *  * Redistributions of source code must retain the above copyright notice,
10 |  *    this list of conditions and the following disclaimer.
11 |  *
12 |  *  * Redistributions in binary form must reproduce the above copyright notice,
13 |  *    this list of conditions and the following disclaimer in the documentation
14 |  *    and/or other materials provided with the distribution.
15 |  *
16 |  *  * Neither the name of the copyright holder nor the names of its
17 |  *    contributors may be used to endorse or promote products derived from
18 |  *    this software without specific prior written permission.
19 |  *
20 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 |  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 |  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 |  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 |  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 |  * POSSIBILITY OF SUCH DAMAGE.
31 |  */
32 | 
33 | enum msg_type {
34 |   PUT,
35 |   GET,
36 |   PUT_ACK,
37 |   GET_RESP,
38 | };
39 | 
40 | struct msg_header {
41 |   void* addr;
42 |   int cmd;
43 |   size_t len;
44 |   int tag;
45 | };
46 | 
47 | /*
48 |  * ReFlex protocol support
49 |  */
50 | 
51 | #define CMD_GET 0x00
52 | #define CMD_SET 0x01
53 | #define CMD_SET_NO_ACK 0x02
54 | 
55 | #define RESP_OK 0x00
56 | #define RESP_EINVAL 0x04
57 | 
58 | #define REQ_PKT 0x80
59 | #define RESP_PKT 0x81
60 | #define MAX_EXTRA_LEN 8
61 | #define MAX_KEY_LEN 8
62 | 
63 | typedef struct __attribute__((__packed__)) {
64 |   uint16_t magic;
65 |   uint16_t opcode;
66 |   void* req_handle;
67 |   unsigned long lba;
68 |   unsigned int lba_count;
69 |   uint64_t tsc;
70 | } binary_header_blk_t;
71 | 


--------------------------------------------------------------------------------
/apps/storage_service/snappy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | git submodule update --init --recursive
 6 | 
 7 | pushd snappy
 8 | rm -rf build
 9 | mkdir build
10 | pushd build
11 | 
12 | cmake -DSNAPPY_BUILD_TESTS=0 -DCMAKE_BUILD_TYPE=Release ..
13 | make -j
14 | 
15 | popd
16 | popd
17 | 


--------------------------------------------------------------------------------
/apps/stream/.gitignore:
--------------------------------------------------------------------------------
1 | stream
2 | stream_linux
3 | stream_query
4 | 


--------------------------------------------------------------------------------
/apps/stream/Makefile:
--------------------------------------------------------------------------------
 1 | # Makefile for netbench
 2 | ROOT_PATH=../..
 3 | include $(ROOT_PATH)/build/shared.mk
 4 | 
 5 | stream_src = stream.cc
 6 | stream_obj = $(stream_src:.cc=.o)
 7 | 
 8 | stream_query_src = stream_query.cc
 9 | stream_query_obj = $(stream_query_src:.cc=.o)
10 | 
11 | lib_shim = $(ROOT_PATH)/shim/libshim.a -ldl
12 | 
13 | librt_libs = $(ROOT_PATH)/bindings/cc/librt++.a
14 | INC += -I$(ROOT_PATH)/bindings/cc
15 | 
16 | RUNTIME_LIBS := $(RUNTIME_LIBS)
17 | 
18 | # must be first
19 | all: stream stream_query stream_linux
20 | 
21 | stream: $(stream_obj) $(librt_libs) $(RUNTIME_DEPS)
22 | 	$(LDXX) -o $@ $(LDFLAGS) $(stream_obj) \
23 | 	-Wl,--wrap=main $(lib_shim) $(librt_libs) $(RUNTIME_LIBS)
24 | 
25 | stream_linux: $(stream_obj)
26 | 	$(LDXX) -o $@ $(LDFLAGS) $(stream_obj) -lpthread
27 | 
28 | stream_query: $(stream_query_obj) $(librt_libs) $(RUNTIME_DEPS)
29 | 	$(LDXX) -o $@ $(LDFLAGS) $(stream_query_obj) \
30 | 	$(librt_libs) $(RUNTIME_LIBS)
31 | 
32 | # general build rules for all targets
33 | src = $(stream_src) $(stream_query_src)
34 | obj = $(src:.cc=.o)
35 | dep = $(obj:.o=.d)
36 | 
37 | ifneq ($(MAKECMDGOALS),clean)
38 | -include $(dep)   # include all dep files in the makefile
39 | endif
40 | 
41 | # rule to generate a dep file by using the C preprocessor
42 | # (see man cpp for details on the -MM and -MT options)
43 | %.d: %.cc
44 | 	@$(CXX) $(CXXFLAGS) $< -MM -MT $(@:.d=.o) >$@
45 | %.o: %.cc
46 | 	$(CXX) $(CXXFLAGS) -c $< -o $@
47 | 
48 | .PHONY: clean
49 | clean:
50 | 	rm -f $(obj) $(dep) stream stream_query stream_linux
51 | 


--------------------------------------------------------------------------------
/apps/stream/stream_query.cc:
--------------------------------------------------------------------------------
 1 | extern "C" {
 2 | #include <base/log.h>
 3 | }
 4 | 
 5 | #include "runtime.h"
 6 | #include "sync.h"
 7 | #include "thread.h"
 8 | #include "timer.h"
 9 | 
10 | #include <chrono>
11 | #include <iostream>
12 | #include <sys/shm.h>
13 | #include <unistd.h>
14 | 
15 | barrier_t barrier;
16 | 
17 | #define SHM_KEY (0x123)
18 | #define CACHELINE 64
19 | 
20 | int main(int argc, char *argv[]) {
21 | 	if (argc != 2) {
22 | 		std::cerr << "usage: [#threads]" << std::endl;
23 | 		return -EINVAL;
24 | 	}
25 | 
26 | 	int threads = std::stoi(argv[1], nullptr, 0);
27 | 
28 | 	int ret = base_init();
29 | 	if (ret) {
30 | 		fprintf(stderr, "failed to init base, ret = %d\n", ret);
31 | 		return ret;
32 | 	}
33 | 
34 | 	volatile double *cnt;
35 | 	int shmid = shmget((key_t)SHM_KEY, CACHELINE * threads, 0666 | IPC_CREAT);
36 | 	void *shm = NULL;
37 | 	shm = shmat(shmid, 0, 0);
38 | 	cnt = (volatile double *)shm;
39 | 
40 | 	uint64_t last_total = 0;
41 | 	auto last = std::chrono::steady_clock::now();
42 | 	while (1) {
43 | 		sleep(1);
44 | 		auto now = std::chrono::steady_clock::now();
45 | 		uint64_t total = 0;
46 | 		double duration =
47 | 			std::chrono::duration_cast<std::chrono::duration<double>>(now - last)
48 | 			.count();
49 | 		for (int i = 0; i < threads; i++) {
50 | 			total += cnt[i * CACHELINE / sizeof(double)];
51 | 		}
52 | 		log_info("mops: %lf, timestamp: %lu",
53 | 			 static_cast<double>(total - last_total) / 1E6 / duration,
54 | 			 (unsigned long)time(NULL));
55 | 		last_total = total;
56 | 		last = now;
57 | 	}
58 | 
59 | 	return 0;
60 | }
61 | 


--------------------------------------------------------------------------------
/apps/streamcluster/COPYRIGHT:
--------------------------------------------------------------------------------
1 | Copyright (c) 2007 Princeton University
2 | Authors: Jia Deng
3 |          Christian Bienia
4 | 


--------------------------------------------------------------------------------
/apps/streamcluster/Makefile:
--------------------------------------------------------------------------------
 1 | # Makefile for netbench
 2 | ROOT_PATH=../..
 3 | include $(ROOT_PATH)/build/shared.mk
 4 | 
 5 | streamcluster_src = streamcluster.cc
 6 | streamcluster_obj = $(streamcluster_src:.cc=.o)
 7 | 
 8 | lib_shim = $(ROOT_PATH)/shim/libshim.a -ldl
 9 | 
10 | librt_libs = $(ROOT_PATH)/bindings/cc/librt++.a
11 | INC += -I$(ROOT_PATH)/bindings/cc
12 | 
13 | CXXFLAGS += -DENABLE_THREADS -march=native
14 | LDFLAGS += -Wl,--wrap=main -no-pie
15 | 
16 | RUNTIME_LIBS := $(RUNTIME_LIBS)
17 | 
18 | # must be first
19 | all: streamcluster
20 | 
21 | streamcluster: $(streamcluster_obj) $(librt_libs) $(RUNTIME_DEPS)
22 | 	$(LDXX) -o $@ $(LDFLAGS) $(streamcluster_obj) \
23 | 	$(lib_shim) $(librt_libs) $(RUNTIME_LIBS)
24 | 
25 | # general build rules for all targets
26 | src = $(streamcluster_src)
27 | obj = $(src:.cc=.o)
28 | dep = $(obj:.o=.d)
29 | 
30 | ifneq ($(MAKECMDGOALS),clean)
31 | -include $(dep)   # include all dep files in the makefile
32 | endif
33 | 
34 | # rule to generate a dep file by using the C preprocessor
35 | # (see man cpp for details on the -MM and -MT options)
36 | %.d: %.cc
37 | 	@$(CXX) $(CXXFLAGS) $< -MM -MT $(@:.d=.o) >$@
38 | %.o: %.cc
39 | 	$(CXX) $(CXXFLAGS) -c $< -o $@
40 | 
41 | .PHONY: clean
42 | clean:
43 | 	rm -f $(obj) $(dep) streamcluster
44 | 


--------------------------------------------------------------------------------
/apps/synthetic/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | Cargo.lock
3 | 


--------------------------------------------------------------------------------
/apps/synthetic/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "synthetic"
 3 | version = "0.1.1"
 4 | authors = ["The Shenango Developers"]
 5 | edition = "2021"
 6 | 
 7 | [dependencies]
 8 | arrayvec = "0.7.2"
 9 | byteorder = "1.5"
10 | clap = "2.32.0"
11 | dns-parser = "0.7.1"
12 | hostname = "0.1.4"
13 | itertools = "0.8.0"
14 | libc = "0.2"
15 | rand = "0.7.3"
16 | rand_distr = "0.3.0"
17 | rand_mt = "3"
18 | serde = "1.0"
19 | net2 = "0.2"
20 | mersenne_twister = "1.1.1"
21 | shenango = { path = "../../bindings/rust" }
22 | 
23 | [profile.release]
24 | debug=false
25 | panic = "abort"
26 | 
27 | [workspace]
28 | 


--------------------------------------------------------------------------------
/apps/synthetic/build.rs:
--------------------------------------------------------------------------------
 1 | use std::path::PathBuf;
 2 | 
 3 | fn main() {
 4 |     let manifest_path: PathBuf = std::env::var("CARGO_MANIFEST_DIR")
 5 |         .unwrap()
 6 |         .parse()
 7 |         .unwrap();
 8 |     // manifest_path is now .../caladan/apps/synthetic
 9 |     let link_script_path = manifest_path
10 |         .parent()
11 |         .unwrap()
12 |         .parent()
13 |         .unwrap()
14 |         .join("base/base.ld");
15 |     println!(
16 |         "cargo:rustc-link-arg=-T{}",
17 |         link_script_path.to_str().unwrap()
18 |     );
19 | }
20 | 


--------------------------------------------------------------------------------
/apps/synthetic/rust-toolchain.toml:
--------------------------------------------------------------------------------
1 | [toolchain]
2 | channel = "nightly"
3 | targets = [ "x86_64-unknown-linux-gnu" ]
4 | 


--------------------------------------------------------------------------------
/apps/synthetic/src/distribution.rs:
--------------------------------------------------------------------------------
 1 | use rand::Rng;
 2 | use rand_distr::{Distribution as DistR, Exp};
 3 | 
 4 | #[derive(Copy, Clone, Debug)]
 5 | pub enum Distribution {
 6 |     Zero,
 7 |     Constant(u64),
 8 |     Exponential(f64),
 9 |     Bimodal(f64, u64, u64),
10 | }
11 | 
12 | impl Distribution {
13 |     pub fn name(&self) -> &'static str {
14 |         match *self {
15 |             Distribution::Zero => "zero",
16 |             Distribution::Constant(_) => "constant",
17 |             Distribution::Exponential(_) => "exponential",
18 |             Distribution::Bimodal(_, _, _) => "bimodal",
19 |         }
20 |     }
21 |     pub fn sample<R: Rng>(&self, rng: &mut R) -> u64 {
22 |         match *self {
23 |             Distribution::Zero => 0,
24 |             Distribution::Constant(m) => m,
25 |             Distribution::Exponential(m) => Exp::new(1.0 / m).unwrap().sample(rng) as u64,
26 |             Distribution::Bimodal(p, v1, v2) => {
27 |                 if rng.gen_bool(p) {
28 |                     v1
29 |                 } else {
30 |                     v2
31 |                 }
32 |             }
33 |         }
34 |     }
35 | 
36 |     pub fn create(spec: &str) -> Result<Self, &str> {
37 |         let tokens: Vec<&str> = spec.split(":").collect();
38 |         assert!(tokens.len() > 0);
39 |         match tokens[0] {
40 |             "zero" => Ok(Distribution::Zero),
41 |             "constant" => {
42 |                 assert!(tokens.len() == 2);
43 |                 let val: u64 = tokens[1].parse().unwrap();
44 |                 Ok(Distribution::Constant(val))
45 |             }
46 |             "exponential" => {
47 |                 assert!(tokens.len() == 2);
48 |                 let val: f64 = tokens[1].parse().unwrap();
49 |                 Ok(Distribution::Exponential(val))
50 |             }
51 |             "bimodal" => {
52 |                 assert!(tokens.len() == 4);
53 |                 let prob: f64 = tokens[1].parse().unwrap();
54 |                 let val1: u64 = tokens[2].parse().unwrap();
55 |                 let val2: u64 = tokens[3].parse().unwrap();
56 |                 Ok(Distribution::Bimodal(prob, val1, val2))
57 |             }
58 |             _ => Err("bad distribution spec"),
59 |         }
60 |     }
61 | }
62 | 


--------------------------------------------------------------------------------
/apps/synthetic/src/lockstep.rs:
--------------------------------------------------------------------------------
 1 | extern crate hostname;
 2 | 
 3 | use std::io::{self, Read, Write};
 4 | use std::net::SocketAddrV4;
 5 | use std::time::Duration;
 6 | 
 7 | use crate::Backend;
 8 | use crate::Connection;
 9 | 
10 | pub enum Group {
11 |     Server(Vec<Connection>),
12 |     Client(Connection),
13 | }
14 | 
15 | impl Group {
16 |     pub fn new_server(
17 |         num_clients: usize,
18 |         addr: SocketAddrV4,
19 |         backend: Backend,
20 |     ) -> io::Result<Self> {
21 |         let listener = backend.create_tcp_listener(addr)?;
22 |         let mut clients = Vec::new();
23 |         for _ in 0..num_clients {
24 |             clients.push(listener.accept()?);
25 |         }
26 | 
27 |         Ok(Group::Server(clients))
28 |     }
29 | 
30 |     pub fn new_client(addr: SocketAddrV4, backend: Backend) -> io::Result<Self> {
31 |         for _ in 0..5 {
32 |             match backend.create_tcp_connection(None, addr) {
33 |                 Ok(stream) => return Ok(Group::Client(stream)),
34 |                 Err(_) => backend.sleep(Duration::from_millis(50)),
35 |             }
36 |         }
37 |         Ok(Group::Client(backend.create_tcp_connection(None, addr)?))
38 |     }
39 | 
40 |     pub fn barrier(&mut self) {
41 |         let mut buf = [0; 1];
42 | 
43 |         match *self {
44 |             Group::Server(ref mut clients) => {
45 |                 for c in clients.iter_mut() {
46 |                     c.read_exact(&mut buf).unwrap();
47 |                 }
48 |                 buf[0] = 0;
49 |                 for c in clients {
50 |                     c.write_all(&buf).unwrap();
51 |                 }
52 |             }
53 |             Group::Client(ref mut stream) => {
54 |                 stream.write_all(&buf).unwrap();
55 |                 stream.read_exact(&mut buf).unwrap();
56 |             }
57 |         }
58 |     }
59 | }
60 | 


--------------------------------------------------------------------------------
/apps/synthetic/src/payload.rs:
--------------------------------------------------------------------------------
 1 | use crate::Buffer;
 2 | use crate::Connection;
 3 | use crate::LoadgenProtocol;
 4 | use crate::Packet;
 5 | use crate::Transport;
 6 | 
 7 | use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
 8 | use std::io;
 9 | use std::io::Read;
10 | 
11 | pub struct Payload {
12 |     pub work_iterations: u64,
13 |     pub index: u64,
14 |     pub randomness: u64,
15 | }
16 | 
17 | pub const PAYLOAD_SIZE: usize = 24;
18 | 
19 | #[derive(Clone, Copy)]
20 | pub struct SyntheticProtocol {}
21 | 
22 | impl LoadgenProtocol for SyntheticProtocol {
23 |     fn uses_ordered_requests(&self) -> bool {
24 |         false
25 |     }
26 | 
27 |     fn gen_req(&self, i: usize, p: &Packet, buf: &mut Vec<u8>) {
28 |         Payload {
29 |             work_iterations: p.work_iterations,
30 |             index: i as u64,
31 |             randomness: p.randomness,
32 |         }
33 |         .serialize_into(buf)
34 |         .unwrap();
35 |     }
36 | 
37 |     fn read_response(&self, mut sock: &Connection, buf: &mut Buffer) -> io::Result<(usize, u64)> {
38 |         let scratch = buf.get_empty_buf();
39 |         sock.read_exact(&mut scratch[..PAYLOAD_SIZE])?;
40 |         let payload = Payload::deserialize(&mut &scratch[..])?;
41 |         Ok((payload.index as usize, payload.randomness))
42 |     }
43 | }
44 | 
45 | impl SyntheticProtocol {
46 |     pub fn with_args(_matches: &clap::ArgMatches, _tport: Transport) -> Self {
47 |         SyntheticProtocol {}
48 |     }
49 | 
50 |     pub fn args<'a, 'b>() -> Vec<clap::Arg<'a, 'b>> {
51 |         vec![]
52 |     }
53 | }
54 | 
55 | impl Payload {
56 |     pub fn serialize_into<W: io::Write>(&self, writer: &mut W) -> io::Result<()> {
57 |         writer.write_u64::<BigEndian>(self.work_iterations)?;
58 |         writer.write_u64::<BigEndian>(self.index)?;
59 |         writer.write_u64::<BigEndian>(self.randomness)?;
60 |         Ok(())
61 |     }
62 | 
63 |     pub fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Payload> {
64 |         let p = Payload {
65 |             work_iterations: reader.read_u64::<BigEndian>()?,
66 |             index: reader.read_u64::<BigEndian>()?,
67 |             randomness: reader.read_u64::<BigEndian>()?,
68 |         };
69 |         return Ok(p);
70 |     }
71 | }
72 | 


--------------------------------------------------------------------------------
/base/base.ld:
--------------------------------------------------------------------------------
 1 | SECTIONS
 2 | {
 3 |   .perthread (NOLOAD) : {
 4 |     PROVIDE(__perthread_start = .);
 5 |     *(.perthread)
 6 |     PROVIDE(__perthread_end = .);
 7 |   }
 8 | }
 9 | INSERT AFTER .text
10 | 


--------------------------------------------------------------------------------
/base/fd_transfer.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <base/fd_transfer.h>
 3 | #include <base/log.h>
 4 | #include <base/stddef.h>
 5 | 
 6 | #include <sys/socket.h>
 7 | 
 8 | int recv_fd(int fd, int *fd_out)
 9 | {
10 | 	struct msghdr msg;
11 | 	char buf[CMSG_SPACE(sizeof(int))];
12 | 	struct iovec iov[1];
13 | 	char iobuf[1];
14 | 	ssize_t ret;
15 | 	struct cmsghdr *cmptr;
16 | 
17 | 	/* init message header and buffs for control message and iovec */
18 | 	msg.msg_control = buf;
19 | 	msg.msg_controllen = sizeof(buf);
20 | 	msg.msg_name = NULL;
21 | 	msg.msg_namelen = 0;
22 | 
23 | 	iov[0].iov_base = iobuf;
24 | 	iov[0].iov_len = sizeof(iobuf);
25 | 	msg.msg_iov = iov;
26 | 	msg.msg_iovlen = 1;
27 | 
28 | 	ret = recvmsg(fd, &msg, 0);
29 | 	if (ret < 0) {
30 | 		log_debug("control: error with recvmsg %ld", ret);
31 | 		return ret;
32 | 	}
33 | 
34 | 	/* check validity of control message */
35 | 	cmptr = CMSG_FIRSTHDR(&msg);
36 | 	if (cmptr == NULL) {
37 | 		log_debug("control: no cmsg %p", cmptr);
38 | 		return -1;
39 | 	} else if (cmptr->cmsg_len != CMSG_LEN(sizeof(int))) {
40 | 		log_debug("control: cmsg is too long %ld", cmptr->cmsg_len);
41 | 		return -1;
42 | 	} else if (cmptr->cmsg_level != SOL_SOCKET) {
43 | 		log_debug("control: unrecognized cmsg level %d", cmptr->cmsg_level);
44 | 		return -1;
45 | 	} else if (cmptr->cmsg_type != SCM_RIGHTS) {
46 | 		log_debug("control: unrecognized cmsg type %d", cmptr->cmsg_type);
47 | 		return -1;
48 | 	}
49 | 
50 | 	*fd_out = *(int *)CMSG_DATA(cmptr);
51 | 	return 0;
52 | }
53 | 
54 | int send_fd(int controlfd, int shared_fd)
55 | {
56 | 	struct msghdr msg;
57 | 	char buf[CMSG_SPACE(sizeof(int))];
58 | 	struct iovec iov[1];
59 | 	char iobuf[1];
60 | 	struct cmsghdr *cmptr;
61 | 
62 | 	/* init message header, iovec is necessary even though it's unused */
63 | 	msg.msg_name = NULL;
64 | 	msg.msg_namelen = 0;
65 | 	msg.msg_control = buf;
66 | 	msg.msg_controllen = sizeof(buf);
67 | 	iov[0].iov_base = iobuf;
68 | 	iov[0].iov_len = sizeof(iobuf);
69 | 	msg.msg_iov = iov;
70 | 	msg.msg_iovlen = 1;
71 | 
72 | 	/* init control message */
73 | 	cmptr = CMSG_FIRSTHDR(&msg);
74 | 	cmptr->cmsg_len = CMSG_LEN(sizeof(int));
75 | 	cmptr->cmsg_level = SOL_SOCKET;
76 | 	cmptr->cmsg_type = SCM_RIGHTS;
77 | 	*(int *)CMSG_DATA(cmptr) = shared_fd;
78 | 
79 | 	if (sendmsg(controlfd, &msg, 0) != sizeof(iobuf)) {
80 | 		log_err("failed to send cmsg");
81 | 		return -1;
82 | 	}
83 | 
84 | 	return 0;
85 | }


--------------------------------------------------------------------------------
/base/init.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * init.c - support for initialization
 3 |  */
 4 | 
 5 | #include <stdlib.h>
 6 | 
 7 | #include <base/init.h>
 8 | #include <base/log.h>
 9 | #include <base/thread.h>
10 | 
11 | #include "init_internal.h"
12 | 
13 | bool base_init_done __aligned(CACHE_LINE_SIZE);
14 | 
15 | void __weak init_shutdown(int status)
16 | {
17 | 	log_info("init: shutting down -> %s",
18 | 		 status == EXIT_SUCCESS ? "SUCCESS" : "FAILURE");
19 | 	exit(status);
20 | }
21 | 
22 | /* we initialize these early subsystems by hand */
23 | static int init_internal(void)
24 | {
25 | 	int ret;
26 | 
27 | 	ret = cpu_init();
28 | 	if (ret)
29 | 		return ret;
30 | 
31 | 	ret = time_init();
32 | 	if (ret)
33 | 		return ret;
34 | 
35 | 	ret = page_init();
36 | 	if (ret) {
37 | 		log_err("Could not intialize memory. Please ensure that hugepages are "
38 | 			    "enabled/available.");
39 | 		return ret;
40 | 	}
41 | 
42 | 	return slab_init();
43 | }
44 | 
45 | 
46 | extern int thread_init_perthread(void);
47 | 
48 | /**
49 |  * base_init - initializes the base library
50 |  *
51 |  * Call this function before using the library.
52 |  * Returns 0 if successful, otherwise fail.
53 |  */
54 | int base_init(void)
55 | {
56 | 	int ret;
57 | 
58 | 	ret = thread_init_perthread();
59 | 	if (ret)
60 | 		return ret;
61 | 
62 | 	ret = init_internal();
63 | 	if (ret)
64 | 		return ret;
65 | 
66 | 	base_init_done = true;
67 | 	return 0;
68 | }
69 | 
70 | static int init_thread_internal(void)
71 | {
72 | 	return page_init_thread();
73 | }
74 | 
75 | /**
76 |  * base_init_thread - prepares a thread for use by the base library
77 |  *
78 |  * Returns 0 if successful, otherwise fail.
79 |  */
80 | int base_init_thread(void)
81 | {
82 | 	int ret;
83 | 
84 | 	ret = thread_init_perthread();
85 | 	if (ret)
86 | 		return ret;
87 | 
88 | 	ret = init_thread_internal();
89 | 	if (ret)
90 | 		return ret;
91 | 
92 | 	perthread_store(thread_init_done, true);
93 | 	return 0;
94 | }
95 | 
96 | 


--------------------------------------------------------------------------------
/base/init_internal.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * init_internal.h - internal base library initialization routines
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | /* internal base library initializers */
 8 | extern int cpu_init(void);
 9 | extern int time_init(void);
10 | extern int page_init(void);
11 | extern int slab_init(void);
12 | extern int smalloc_init(void);
13 | 
14 | /* internal base library per-thread initializers */
15 | extern int page_init_thread(void);
16 | extern int smalloc_init_thread(void);
17 | 


--------------------------------------------------------------------------------
/base/list.c:
--------------------------------------------------------------------------------
 1 | /* Licensed under BSD-MIT - see LICENSE file for details */
 2 | #include <base/stddef.h>
 3 | #include <base/list.h>
 4 | #include <base/log.h>
 5 | 
 6 | static void *corrupt(const char *abortstr,
 7 | 		     const struct list_node *head,
 8 | 		     const struct list_node *node,
 9 | 		     unsigned int count)
10 | {
11 | 	if (abortstr) {
12 | 		panic("%s: prev corrupt in node %p (%u) of %p\n",
13 | 		      abortstr, node, count, head);
14 | 	}
15 | 	return NULL;
16 | }
17 | 
18 | struct list_node *list_check_node(const struct list_node *node,
19 | 				  const char *abortstr)
20 | {
21 | 	const struct list_node *p, *n;
22 | 	int count = 0;
23 | 
24 | 	for (p = node, n = node->next; n != node; p = n, n = n->next) {
25 | 		count++;
26 | 		if (n->prev != p)
27 | 			return corrupt(abortstr, node, n, count);
28 | 	}
29 | 	/* Check prev on head node. */
30 | 	if (node->prev != p)
31 | 		return corrupt(abortstr, node, node, 0);
32 | 
33 | 	return (struct list_node *)node;
34 | }
35 | 
36 | struct list_head *list_check(const struct list_head *h, const char *abortstr)
37 | {
38 | 	if (!list_check_node(&h->n, abortstr))
39 | 		return NULL;
40 | 	return (struct list_head *)h;
41 | }
42 | 


--------------------------------------------------------------------------------
/base/log.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * log.c - the logging system
 3 |  */
 4 | 
 5 | #include <stdio.h>
 6 | #include <string.h>
 7 | #include <stdarg.h>
 8 | #include <execinfo.h>
 9 | #include <sched.h>
10 | 
11 | #include <base/stddef.h>
12 | #include <base/log.h>
13 | #include <base/time.h>
14 | #include <asm/ops.h>
15 | 
16 | #define MAX_LOG_LEN 4096
17 | 
18 | /* log levels greater than this value won't be printed */
19 | int max_loglevel = LOG_DEBUG;
20 | 
21 | void logk(int level, const char *fmt, ...)
22 | {
23 | 	char buf[MAX_LOG_LEN];
24 | 	va_list ptr;
25 | 	off_t off;
26 | 	int cpu;
27 | 
28 | 	if (level > max_loglevel)
29 | 		return;
30 | 
31 | 	cpu = sched_getcpu();
32 | 
33 | 	if (likely(base_init_done)) {
34 | 		uint64_t us = microtime();
35 | 		sprintf(buf, "[%3d.%06d] CPU %02d| <%d> ",
36 | 			(int)(us / ONE_SECOND), (int)(us % ONE_SECOND),
37 | 			cpu, level);
38 | 	} else {
39 | 		sprintf(buf, "CPU %02d| <%d> ", cpu, level);
40 | 	}
41 | 
42 | 	off = strlen(buf);
43 | 	va_start(ptr, fmt);
44 | 	vsnprintf(buf + off, MAX_LOG_LEN - off, fmt, ptr);
45 | 	va_end(ptr);
46 | 	puts(buf);
47 | 
48 | 	if (level <= LOG_ERR)
49 | 		fflush(stdout);
50 | }
51 | 
52 | #define MAX_CALL_DEPTH	256
53 | void logk_backtrace(void)
54 | {
55 | 	void *buf[MAX_CALL_DEPTH];
56 | 	const int calls = backtrace(buf, ARRAY_SIZE(buf));
57 | 	backtrace_symbols_fd(buf, calls, 1);
58 | }
59 | 
60 | void logk_bug(bool fatal, const char *expr,
61 | 	      const char *file, int line, const char *func)
62 | {
63 | 	logk(LOG_EMERG, "%s: %s:%d ASSERTION '%s' FAILED IN '%s'",
64 | 	     fatal ? "FATAL" : "WARN", file, line, expr, func);
65 | 	logk_backtrace();
66 | 
67 | 	if (fatal)
68 | 		init_shutdown(EXIT_FAILURE);
69 | }
70 | 


--------------------------------------------------------------------------------
/base/lrpc.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * lrpc.c - shared memory communication channels
 3 |  */
 4 | 
 5 | #include <string.h>
 6 | 
 7 | #include <base/lrpc.h>
 8 | 
 9 | /* internal use only */
10 | bool __lrpc_send(struct lrpc_chan_out *chan, uint64_t cmd,
11 | 		 unsigned long payload)
12 | {
13 | 	struct lrpc_msg *dst;
14 | 
15 | 	assert(chan->send_head - chan->send_tail == chan->size);
16 | 
17 | 	chan->send_tail = load_acquire(chan->recv_head_wb);
18 |         if (chan->send_head - chan->send_tail == chan->size)
19 |                 return false;
20 | 
21 | 	dst = &chan->tbl[chan->send_head & (chan->size - 1)];
22 | 	dst->payload = payload;
23 | 
24 | 	cmd |= (chan->send_head++ & chan->size) ? 0 : LRPC_DONE_PARITY;
25 | 	store_release(&dst->cmd, cmd);
26 | 	return true;
27 | }
28 | 
29 | /**
30 |  * lrpc_init_out - initializes an egress shared memory channel
31 |  * @chan: the channel struct to initialize
32 |  * @tbl: a buffer to store channel messages
33 |  * @size: the number of message elements in the buffer
34 |  * @recv_head_wb: a pointer to the head position of the receiver
35 |  *
36 |  * returns 0 if successful, or -EINVAL if @size is not a power of two.
37 |  */
38 | int lrpc_init_out(struct lrpc_chan_out *chan, struct lrpc_msg *tbl,
39 | 		  unsigned int size, uint32_t *recv_head_wb)
40 | {
41 | 	if (!is_power_of_two(size))
42 | 		return -EINVAL;
43 | 
44 | 	memset(chan, 0, sizeof(*chan));
45 | 	chan->tbl = tbl;
46 | 	chan->size = size;
47 | 	chan->recv_head_wb = recv_head_wb;
48 | 	return 0;
49 | }
50 | 
51 | /**
52 |  * lrpc_init_in - initializes an ingress shared memory channel
53 |  * @chan: the channel struct to initialize
54 |  * @tbl: a buffer to store channel messages
55 |  * @size: the number of message elements in the buffer
56 |  * @recv_head_wb: a pointer to the head position of the receiver
57 |  *
58 |  * returns 0 if successful, or -EINVAL if @size is not a power of two.
59 |  */
60 | int lrpc_init_in(struct lrpc_chan_in *chan, struct lrpc_msg *tbl,
61 | 		 unsigned int size, uint32_t *recv_head_wb)
62 | {
63 | 	if (!is_power_of_two(size))
64 | 		return -EINVAL;
65 | 
66 | 	memset(chan, 0, sizeof(*chan));
67 | 	chan->tbl = tbl;
68 | 	chan->size = size;
69 | 	chan->recv_head_wb = recv_head_wb;
70 | 	return 0;
71 | }
72 | 


--------------------------------------------------------------------------------
/base/signal.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * signal.c - support for setting up signal handlers without using glibc
 3 |  */
 4 | 
 5 | #include <base/signal.h>
 6 | #include <base/syscall.h>
 7 | 
 8 | #include <asm/unistd_64.h>
 9 | #include <errno.h>
10 | #include <string.h>
11 | 
12 | #define SA_RESTORER 0x04000000
13 | 
14 | /* copied from glibc sysdeps/unix/sysv/linux/kernel_sigaction.h */
15 | struct kernel_sigaction {
16 | 	__sighandler_t k_sa_handler;
17 | 	unsigned long sa_flags;
18 | 	void (*sa_restorer) (void);
19 | 	sigset_t sa_mask;
20 | };
21 | 
22 | /* allow user to specify sa_restorer */
23 | int base_sigaction_full(int sig, const struct sigaction *act,
24 |                         struct sigaction *oact)
25 | {
26 | 	long ret;
27 | 	struct kernel_sigaction kact, okact;
28 | 
29 | 	if (act) {
30 | 		kact.k_sa_handler = act->sa_handler;
31 | 		memcpy(&kact.sa_mask, &act->sa_mask, sizeof(sigset_t));
32 | 		kact.sa_flags = act->sa_flags | SA_RESTORER;
33 | 		kact.sa_restorer = act->sa_restorer;
34 | 	}
35 | 
36 | 	ret = syscall(__NR_rt_sigaction, sig, act ? &kact : NULL, oact ? &okact : NULL, 8);
37 | 
38 | 	if (ret < 0) {
39 | 		errno = -ret;
40 | 		return -1;
41 | 	}
42 | 
43 | 	if (oact) {
44 | 		oact->sa_handler = okact.k_sa_handler;
45 | 		memcpy(&oact->sa_mask, &okact.sa_mask, sizeof(sigset_t));
46 | 		oact->sa_flags = okact.sa_flags;
47 | 		oact->sa_restorer = okact.sa_restorer;
48 | 	}
49 | 
50 | 	return 0;
51 | }
52 | 
53 | 
54 | /* use our own sa_restorer instead of glibc's */
55 | int base_sigaction(int sig, const struct sigaction *act, struct sigaction *oact)
56 | {
57 | 	long ret;
58 | 	struct kernel_sigaction kact, okact;
59 | 
60 | 	if (act) {
61 | 		kact.k_sa_handler = act->sa_handler;
62 | 		memcpy(&kact.sa_mask, &act->sa_mask, sizeof(sigset_t));
63 | 		kact.sa_flags = act->sa_flags | SA_RESTORER;
64 | 		kact.sa_restorer = &syscall_rt_sigreturn;
65 | 	}
66 | 
67 | 	ret = syscall(__NR_rt_sigaction, sig, act ? &kact : NULL, oact ? &okact : NULL, 8);
68 | 
69 | 	if (ret < 0) {
70 | 		errno = -ret;
71 | 		return -1;
72 | 	}
73 | 
74 | 	if (oact) {
75 | 		oact->sa_handler = okact.k_sa_handler;
76 | 		memcpy(&oact->sa_mask, &okact.sa_mask, sizeof(sigset_t));
77 | 		oact->sa_flags = okact.sa_flags;
78 | 		oact->sa_restorer = okact.sa_restorer;
79 | 	}
80 | 
81 | 	return 0;
82 | }
83 | 


--------------------------------------------------------------------------------
/base/syscall.S:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <asm/unistd_64.h>
 3 | 
 4 | .section        .note.GNU-stack,"",@progbits
 5 | .text
 6 | .globl base_syscall_start
 7 | base_syscall_start:
 8 | 
 9 | .align 16
10 | .globl syscall_mmap
11 | syscall_mmap:
12 | 	movl    $__NR_mmap, %eax
13 | 	movq    %rcx, %r10
14 | 	syscall
15 | 	ret
16 | 
17 | .align 16
18 | .globl syscall_mbind
19 | syscall_mbind:
20 | 	movl    $__NR_mbind, %eax
21 | 	movq    %rcx, %r10
22 | 	syscall
23 | 	ret
24 | 
25 | .align 16
26 | .globl syscall_ioctl
27 | syscall_ioctl:
28 | 	movl    $__NR_ioctl, %eax
29 | 	syscall
30 | 	ret
31 | 
32 | .align 16
33 | .globl syscall_rt_sigreturn
34 | syscall_rt_sigreturn:
35 | 	movl    $__NR_rt_sigreturn, %eax
36 |         // Invalidate the alternate signal stack entry in this frame
37 |         // so the kernel doesn't change it.
38 | 	movl    $4, 24(%rsp)
39 | 	syscall
40 | 	ret
41 | 
42 | .align 16
43 | .globl syscall_madvise
44 | syscall_madvise:
45 | 	movl    $__NR_madvise, %eax
46 | 	syscall
47 | 	ret
48 | 
49 | .align 16
50 | .globl syscall_mprotect
51 | syscall_mprotect:
52 | 	movl    $__NR_mprotect, %eax
53 | 	syscall
54 | 	ret
55 | 
56 | .globl base_syscall_end
57 | base_syscall_end:
58 | 


--------------------------------------------------------------------------------
/base/time.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * time.c - timekeeping utilities
 3 |  */
 4 | 
 5 | #include <time.h>
 6 | 
 7 | #include <base/time.h>
 8 | #include <base/log.h>
 9 | #include <base/init.h>
10 | 
11 | #include "init_internal.h"
12 | 
13 | int cycles_per_us __aligned(CACHE_LINE_SIZE);
14 | uint64_t start_tsc;
15 | 
16 | /**
17 |  * __timer_delay_us - spins the CPU for the specified delay
18 |  * @us: the delay in microseconds
19 |  */
20 | void __time_delay_us(uint64_t us)
21 | {
22 | 	uint64_t cycles = us * cycles_per_us;
23 | 	unsigned long start = rdtsc();
24 | 
25 | 	while (rdtsc() - start < cycles)
26 | 		cpu_relax();
27 | }
28 | 
29 | /* derived from DPDK */
30 | static int time_calibrate_tsc(void)
31 | {
32 | 	/* cycles_per_us may be provided in advance */
33 | 	if (cycles_per_us) {
34 | 		start_tsc = rdtsc();
35 | 		return 0;
36 | 	}
37 | 
38 | 	/* TODO: New Intel CPUs report this value in CPUID */
39 | 	struct timespec sleeptime = {.tv_nsec = 5E8 }; /* 1/2 second */
40 | 	struct timespec t_start, t_end;
41 | 
42 | 	cpu_serialize();
43 | 	if (clock_gettime(CLOCK_MONOTONIC_RAW, &t_start) == 0) {
44 | 		uint64_t ns, end, start;
45 | 		double secs;
46 | 
47 | 		start = rdtsc();
48 | 		nanosleep(&sleeptime, NULL);
49 | 		clock_gettime(CLOCK_MONOTONIC_RAW, &t_end);
50 | 		end = rdtscp(NULL);
51 | 		ns = ((t_end.tv_sec - t_start.tv_sec) * 1E9);
52 | 		ns += (t_end.tv_nsec - t_start.tv_nsec);
53 | 
54 | 		secs = (double)ns / 1000;
55 | 		cycles_per_us = (uint64_t)((end - start) / secs);
56 | 		log_info("time: detected %d ticks / us", cycles_per_us);
57 | 
58 | 		/* record the start time of the binary */
59 | 		start_tsc = rdtsc();
60 | 		return 0;
61 | 	}
62 | 
63 | 	return -1;
64 | }
65 | 
66 | /**
67 |  * time_init - global time initialization
68 |  *
69 |  * Returns 0 if successful, otherwise fail.
70 |  */
71 | int time_init(void)
72 | {
73 | 	return time_calibrate_tsc();
74 | }
75 | 


--------------------------------------------------------------------------------
/bindings/cc/.gitignore:
--------------------------------------------------------------------------------
1 | test
2 | 


--------------------------------------------------------------------------------
/bindings/cc/Makefile:
--------------------------------------------------------------------------------
 1 | # Makefile for C++ bindings
 2 | ROOT_PATH=../..
 3 | include $(ROOT_PATH)/build/shared.mk
 4 | 
 5 | # librt++.a - the c++ runtime library
 6 | rt_src = runtime.cc thread.cc net.cc
 7 | rt_obj = $(rt_src:.cc=.o)
 8 | 
 9 | test_src = test.cc
10 | test_obj = $(test_src:.cc=.o)
11 | 
12 | # must be first
13 | all: librt++.a test
14 | 
15 | librt++.a: $(rt_obj)
16 | 	$(AR) rcs $@ $^
17 | 
18 | test: $(test_obj) librt++.a $(RUNTIME_DEPS)
19 | 	$(LDXX) $(LDFLAGS) -o $@ $(test_obj) librt++.a $(RUNTIME_LIBS)
20 | 
21 | # general build rules for all targets
22 | src = $(rt_src) $(test_src)
23 | obj = $(src:.cc=.o)
24 | dep = $(obj:.o=.d)
25 | 
26 | ifneq ($(MAKECMDGOALS),clean)
27 | -include $(dep)   # include all dep files in the makefile
28 | endif
29 | 
30 | # rule to generate a dep file by using the C preprocessor
31 | # (see man cpp for details on the -MM and -MT options)
32 | %.d: %.cc
33 | 	@$(CXX) $(CXXFLAGS) $< -MM -MT $(@:.d=.o) >$@
34 | %.o: %.cc
35 | 	$(CXX) $(CXXFLAGS) -c $< -o $@
36 | 
37 | .PHONY: clean
38 | clean:
39 | 	rm -f $(obj) $(dep) librt++.a test
40 | 


--------------------------------------------------------------------------------
/bindings/cc/net.cc:
--------------------------------------------------------------------------------
 1 | #include "net.h"
 2 | 
 3 | #include <cstring>
 4 | #include <memory>
 5 | #include <algorithm>
 6 | 
 7 | namespace {
 8 | 
 9 | bool PullIOV(struct iovec **iovp, int *iovcntp, size_t n) {
10 |   struct iovec *iov = *iovp;
11 |   int iovcnt = *iovcntp, i;
12 | 
13 |   for (i = 0; i < iovcnt; ++i) {
14 |     if (n < iov[i].iov_len) {
15 |       iov[i].iov_base = reinterpret_cast<char *>(iov[i].iov_base) + n;
16 |       iov[i].iov_len -= n;
17 |       *iovp = &iov[i];
18 |       *iovcntp -= i;
19 |       return true;
20 |     }
21 |     n -= iov[i].iov_len;
22 |   }
23 | 
24 |   assert(n == 0);
25 |   return false;
26 | }
27 | 
28 | size_t SumIOV(const iovec *iov, int iovcnt) {
29 |   size_t len = 0;
30 |   for (int i = 0; i < iovcnt; ++i) len += iov[i].iov_len;
31 |   return len;
32 | }
33 | 
34 | }  // namespace
35 | 
36 | namespace rt {
37 | 
38 | ssize_t TcpConn::WritevFullRaw(const iovec *iov, int iovcnt) {
39 |   // first try to send without copying the vector
40 |   ssize_t n = tcp_writev(c_, iov, iovcnt);
41 |   if (n < 0) return n;
42 |   assert(n > 0);
43 | 
44 |   // sum total length and check if everything was transfered
45 |   size_t total = SumIOV(iov, iovcnt);
46 |   if (static_cast<size_t>(n) == total) return n;
47 | 
48 |   // partial transfer occurred, send the rest
49 |   size_t len = n;
50 |   std::unique_ptr<iovec[]> v = std::unique_ptr<iovec[]>{new iovec[iovcnt]};
51 |   iovec *iovp = v.get();
52 |   std::copy_n(iov, iovcnt, iovp);
53 |   while (PullIOV(&iovp, &iovcnt, n)) {
54 |     n = tcp_writev(c_, iovp, iovcnt);
55 |     if (n < 0) return n;
56 |     assert(n > 0);
57 |     len += n;
58 |   }
59 | 
60 |   assert(len == total);
61 |   return len;
62 | }
63 | 
64 | ssize_t TcpConn::ReadvFullRaw(const iovec *iov, int iovcnt) {
65 |   // first try to receive without copying the vector
66 |   ssize_t n = tcp_readv(c_, iov, iovcnt);
67 |   if (n <= 0) return n;
68 | 
69 |   // sum total length and check if everything was transfered
70 |   size_t total = SumIOV(iov, iovcnt);
71 |   if (static_cast<size_t>(n) == total) return n;
72 | 
73 |   // partial transfer occurred, receive the rest
74 |   size_t len = n;
75 |   std::unique_ptr<iovec[]> v = std::unique_ptr<iovec[]>{new iovec[iovcnt]};
76 |   iovec *iovp = v.get();
77 |   std::copy_n(iov, iovcnt, iovp);
78 |   while (PullIOV(&iovp, &iovcnt, n)) {
79 |     n = tcp_readv(c_, iovp, iovcnt);
80 |     if (n <= 0) return n;
81 |     len += n;
82 |   }
83 | 
84 |   assert(len == total);
85 |   return len;
86 | }
87 | 
88 | }  // namespace rt
89 | 


--------------------------------------------------------------------------------
/bindings/cc/runtime.cc:
--------------------------------------------------------------------------------
 1 | #include "runtime.h"
 2 | 
 3 | #include "thread.h"
 4 | 
 5 | namespace rt {
 6 | 
 7 | // initializes the runtime
 8 | int RuntimeInit(std::string cfg_path, std::function<void()> main_func) {
 9 |   auto *func_copy = new std::function<void()>(main_func);
10 |   return runtime_init(cfg_path.c_str(), thread_internal::ThreadTrampoline,
11 |                       reinterpret_cast<void *>(func_copy));
12 | }
13 | 
14 | }  // namespace rt
15 | 


--------------------------------------------------------------------------------
/bindings/cc/runtime.h:
--------------------------------------------------------------------------------
 1 | // the main header for Shenango's runtime
 2 | 
 3 | #pragma once
 4 | 
 5 | extern "C" {
 6 | #include <runtime/runtime.h>
 7 | }
 8 | 
 9 | #include <functional>
10 | #include <string>
11 | 
12 | namespace rt {
13 | 
14 | // The highest number of cores supported.
15 | constexpr unsigned int kCoreLimit = NCPU;
16 | 
17 | // Initializes the runtime. If successful, calls @main_func and does not return.
18 | int RuntimeInit(std::string cfg_path, std::function<void()> main_func);
19 | 
20 | // Gets the queueing delay of runqueue (thread queue) + packet queue
21 | inline uint64_t RuntimeQueueUS() { return runtime_queue_us(); }
22 | 
23 | // Gets an estimate of the instantanious load as measured by the IOKernel.
24 | inline float RuntimeLoad() { return runtime_load(); }
25 | 
26 | // Gets the current number of active cores
27 | inline unsigned int RuntimeActiveCores() { return runtime_active_cores(); }
28 | 
29 | // Gets the maximum number of cores the runtime could run on.
30 | inline unsigned int RuntimeMaxCores() { return runtime_max_cores(); }
31 | 
32 | // Gets the guaranteed number of cores the runtime will at least get.
33 | inline unsigned int RuntimeGuaranteedCores() {
34 |   return runtime_guaranteed_cores();
35 | }
36 | 
37 | };  // namespace rt
38 | 


--------------------------------------------------------------------------------
/bindings/cc/storage.h:
--------------------------------------------------------------------------------
 1 | // storage.h - support for flash storage
 2 | 
 3 | #pragma once
 4 | 
 5 | extern "C" {
 6 | #include <runtime/storage.h>
 7 | }
 8 | 
 9 | // TODO: this should be per-device.
10 | class Storage {
11 |  public:
12 |   // Write contiguous storage blocks.
13 |   static int Write(const void *src, uint64_t lba, uint32_t lba_count) {
14 |     return storage_write(src, lba, lba_count);
15 |   }
16 | 
17 |   // Read contiguous storage blocks.
18 |   static int Read(void *dst, uint64_t lba, uint32_t lba_count) {
19 |     return storage_read(dst, lba, lba_count);
20 |   }
21 | 
22 |   // Returns the size of each block.
23 |   static uint32_t get_block_size() { return storage_block_size(); }
24 | 
25 |   // Returns the capacity of the device in blocks.
26 |   static uint64_t get_num_blocks() { return storage_num_blocks(); }
27 | };
28 | 


--------------------------------------------------------------------------------
/bindings/cc/test.cc:
--------------------------------------------------------------------------------
 1 | extern "C" {
 2 | #include <base/log.h>
 3 | #include <base/stddef.h>
 4 | }
 5 | 
 6 | #include <string>
 7 | 
 8 | #include "runtime.h"
 9 | #include "thread.h"
10 | #include "timer.h"
11 | 
12 | namespace {
13 | 
14 | constexpr int kTestValue = 10;
15 | 
16 | void foo(int arg) {
17 |   if (arg != kTestValue) BUG();
18 | }
19 | 
20 | void MainHandler() {
21 |   std::string str = "captured!";
22 |   int i = kTestValue;
23 |   int j = kTestValue;
24 | 
25 |   rt::Spawn([=] {
26 |     log_info("hello from ThreadSpawn()! '%s'", str.c_str());
27 |     foo(i);
28 |   });
29 | 
30 |   rt::Spawn([&] {
31 |     log_info("hello from ThreadSpawn()! '%s'", str.c_str());
32 |     foo(i);
33 |     j *= 2;
34 |   });
35 | 
36 |   rt::Yield();
37 |   if (j != kTestValue * 2) BUG();
38 | 
39 |   rt::Sleep(1 * rt::kMilliseconds);
40 | 
41 |   auto th = rt::Thread([&] {
42 |     log_info("hello from rt::Thread! '%s'", str.c_str());
43 |     foo(i);
44 |   });
45 |   th.Join();
46 | }
47 | 
48 | }  // anonymous namespace
49 | 
50 | int main(int argc, char *argv[]) {
51 |   int ret;
52 | 
53 |   if (argc < 2) {
54 |     printf("arg must be config file\n");
55 |     return -EINVAL;
56 |   }
57 | 
58 |   ret = rt::RuntimeInit(argv[1], MainHandler);
59 |   if (ret) {
60 |     log_err("failed to start runtime");
61 |     return ret;
62 |   }
63 |   return 0;
64 | }
65 | 


--------------------------------------------------------------------------------
/bindings/cc/timer.h:
--------------------------------------------------------------------------------
 1 | // timer.h - support for timers
 2 | 
 3 | #pragma once
 4 | 
 5 | extern "C" {
 6 | #include <base/time.h>
 7 | #include <runtime/timer.h>
 8 | }
 9 | 
10 | namespace rt {
11 | 
12 | static constexpr uint64_t kMilliseconds = 1000;
13 | static constexpr uint64_t kSeconds = 1000000;
14 | 
15 | // Gets the current number of microseconds since the launch of the runtime.
16 | inline uint64_t MicroTime() { return microtime(); }
17 | 
18 | // Busy-spins for a microsecond duration.
19 | inline void Delay(uint64_t us) { delay_us(us); }
20 | 
21 | // Sleeps until a microsecond deadline.
22 | inline void SleepUntil(uint64_t deadline_us) { timer_sleep_until(deadline_us); }
23 | 
24 | // Sleeps for a microsecond duration.
25 | inline void Sleep(uint64_t duration_us) { timer_sleep(duration_us); }
26 | 
27 | }  // namespace rt
28 | 


--------------------------------------------------------------------------------
/bindings/rust/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | 


--------------------------------------------------------------------------------
/bindings/rust/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "shenango"
 3 | version = "0.1.1"
 4 | authors = ["The Shenango Developers"]
 5 | edition = "2021"
 6 | 
 7 | [dependencies]
 8 | libc = "0.2"
 9 | byteorder = "1.5"
10 | 
11 | [build-dependencies]
12 | bindgen = "0.69.1"
13 | build-deps = "0.1.4"
14 | proc-macro2 = "1.0.66"
15 | anyhow = "1.0.86"
16 | 
17 | [[bin]]
18 | name = "runtime_threads"
19 | path = "src/test_runtime_threads.rs"
20 | 
21 | [[bin]]
22 | name = "runtime_joinhandle"
23 | path = "src/test_runtime_joinhandle.rs"
24 | 
25 | [[bin]]
26 | name = "hello"
27 | path = "src/test_hello.rs"
28 | 
29 | [[bin]]
30 | name = "smalloc"
31 | path = "src/test_smalloc.rs"
32 | 


--------------------------------------------------------------------------------
/bindings/rust/rust-toolchain.toml:
--------------------------------------------------------------------------------
1 | [toolchain]
2 | channel = "nightly"
3 | targets = [ "x86_64-unknown-linux" ]
4 | 


--------------------------------------------------------------------------------
/bindings/rust/shenango.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <stdbool.h>
 3 | 
 4 | #include <base/assert.h>
 5 | #include <base/init.h>
 6 | #include <base/lock.h>
 7 | #include <base/log.h>
 8 | #include <base/slab.h>
 9 | #include <base/tcache.h>
10 | 
11 | #include <runtime/preempt.h>
12 | #include <runtime/runtime.h>
13 | #include <runtime/smalloc.h>
14 | #include <runtime/storage.h>
15 | #include <runtime/sync.h>
16 | #include <runtime/tcp.h>
17 | #include <runtime/thread.h>
18 | #include <runtime/timer.h>
19 | #include <runtime/udp.h>
20 | 


--------------------------------------------------------------------------------
/bindings/rust/src/asm.rs:
--------------------------------------------------------------------------------
 1 | #[inline]
 2 | pub fn cpu_relax() {
 3 |     unsafe { core::arch::x86_64::_mm_pause() }
 4 | }
 5 | 
 6 | #[inline]
 7 | pub fn cpu_serialize() {
 8 |     unsafe {
 9 |         core::arch::x86_64::__cpuid(0);
10 |     }
11 | }
12 | 
13 | #[inline]
14 | pub fn rdtsc() -> u64 {
15 |     unsafe { core::arch::x86_64::_rdtsc() }
16 | }
17 | 
18 | #[inline]
19 | pub fn rdtscp() -> (u64, u32) {
20 |     let mut aux: u32 = 0;
21 |     let tsc = unsafe { core::arch::x86_64::__rdtscp(&mut aux as *mut u32) };
22 |     (tsc, aux)
23 | }
24 | 


--------------------------------------------------------------------------------
/bindings/rust/src/ffi.rs:
--------------------------------------------------------------------------------
1 | #![allow(non_upper_case_globals)]
2 | #![allow(non_camel_case_types)]
3 | #![allow(non_snake_case)]
4 | 
5 | include!(concat!(env!("OUT_DIR"), "/bindings.rs"));
6 | 


--------------------------------------------------------------------------------
/bindings/rust/src/storage.rs:
--------------------------------------------------------------------------------
 1 | use std::io::{Error, ErrorKind, Result};
 2 | 
 3 | use super::*;
 4 | 
 5 | extern "C" {
 6 |     #[link_name = "block_size"]
 7 |     static block_size: ffi::u_int32_t;
 8 | 
 9 |     #[link_name = "num_blocks"]
10 |     static num_blocks: ffi::u_int64_t;
11 | }
12 | 
13 | pub fn storage_block_size() -> Result<usize> {
14 |     let bsize = unsafe { block_size };
15 |     if bsize == 0 {
16 |         return Err(Error::new(ErrorKind::Other, "storage not enabled"));
17 |     }
18 |     Ok(bsize as usize)
19 | }
20 | 
21 | pub fn storage_num_blocks() -> Result<usize> {
22 |     let nblocks = unsafe { num_blocks };
23 |     if nblocks == 0 {
24 |         return Err(Error::new(ErrorKind::Other, "storage not enabled"));
25 |     }
26 |     Ok(nblocks as usize)
27 | }
28 | 
29 | pub fn storage_read(buf: &mut [u8], lba: u64) -> Result<usize> {
30 |     let bsize = storage_block_size()?;
31 |     let nblocks = buf.len() / bsize;
32 |     let res = unsafe { ffi::storage_read(buf.as_mut_ptr() as *mut c_void, lba, nblocks as u32) };
33 |     if res < 0 {
34 |         Err(Error::from_raw_os_error(-res))
35 |     } else {
36 |         Ok((nblocks * bsize) as usize)
37 |     }
38 | }
39 | 
40 | pub fn storage_write(buf: &[u8], lba: u64) -> Result<usize> {
41 |     let bsize = storage_block_size()?;
42 |     let nblocks = buf.len() / bsize;
43 |     let res = unsafe { ffi::storage_write(buf.as_ptr() as *const c_void, lba, nblocks as u32) };
44 |     if res < 0 {
45 |         Err(Error::from_raw_os_error(-res))
46 |     } else {
47 |         Ok((nblocks * bsize) as usize)
48 |     }
49 | }
50 | 


--------------------------------------------------------------------------------
/bindings/rust/src/test_hello.rs:
--------------------------------------------------------------------------------
1 | extern crate shenango;
2 | 
3 | fn main() {
4 |     shenango::base_init().unwrap();
5 |     shenango::base_init_thread().unwrap();
6 | }
7 | 


--------------------------------------------------------------------------------
/bindings/rust/src/test_runtime_joinhandle.rs:
--------------------------------------------------------------------------------
 1 | extern crate shenango;
 2 | 
 3 | use shenango::WaitGroup;
 4 | use std::sync::Arc;
 5 | 
 6 | const N: usize = 50000;
 7 | const NCORES: usize = 3;
 8 | 
 9 | fn main_handler() {
10 |     println!("started main_handler() thread");
11 |     println!("creating threads with 1us of fake work.");
12 | 
13 |     let wg = Arc::new(WaitGroup::new());
14 |     wg.add(NCORES as i32);
15 | 
16 |     let start_us = shenango::microtime();
17 | 
18 |     let mut join_handles = Vec::new();
19 |     for _ in 0..NCORES {
20 |         join_handles.push(shenango::thread::spawn(|| {
21 |             for _ in 0..N {
22 |                 shenango::thread::spawn(|| shenango::delay_us(1))
23 |                     .join()
24 |                     .unwrap();
25 |             }
26 |         }));
27 |     }
28 | 
29 |     for j in join_handles {
30 |         j.join().unwrap();
31 |     }
32 | 
33 |     let threads_per_second =
34 |         (NCORES * N) as f64 / ((shenango::microtime() - start_us) as f64 * 0.000001);
35 |     println!(
36 |         "spawned {} threads / second, efficiency {}",
37 |         threads_per_second,
38 |         0.000001 * threads_per_second / NCORES as f64
39 |     );
40 | }
41 | 
42 | fn main() {
43 |     let args: Vec<_> = ::std::env::args().collect();
44 |     assert!(args.len() >= 2, "arg must be config file");
45 |     shenango::runtime_init(args[1].clone(), main_handler).unwrap();
46 | }
47 | 


--------------------------------------------------------------------------------
/bindings/rust/src/test_runtime_threads.rs:
--------------------------------------------------------------------------------
 1 | extern crate shenango;
 2 | 
 3 | use shenango::WaitGroup;
 4 | use std::sync::Arc;
 5 | 
 6 | const N: usize = 1000000;
 7 | const NCORES: usize = 4;
 8 | 
 9 | fn leaf_handler(wg_parent: Arc<WaitGroup>) {
10 |     shenango::delay_us(1);
11 |     wg_parent.done();
12 | }
13 | 
14 | fn work_handler(wg_parent: Arc<WaitGroup>) {
15 |     let wg = Arc::new(WaitGroup::new());
16 |     wg.add(N as i32);
17 |     for _ in 0..N {
18 |         let wg2 = wg.clone();
19 |         shenango::thread::spawn_detached(move || leaf_handler(wg2));
20 |         shenango::thread::thread_yield();
21 |     }
22 | 
23 |     wg.wait();
24 |     wg_parent.done();
25 | }
26 | 
27 | fn main_handler() {
28 |     println!("started main_handler() thread");
29 |     println!("creating threads with 1us of fake work.");
30 | 
31 |     let wg = Arc::new(WaitGroup::new());
32 |     wg.add(NCORES as i32);
33 | 
34 |     let start_us = shenango::microtime();
35 |     for _ in 0..NCORES {
36 |         let wg2 = wg.clone();
37 |         shenango::thread::spawn_detached(move || work_handler(wg2));
38 |     }
39 | 
40 |     wg.wait();
41 | 
42 |     let threads_per_second =
43 |         (NCORES * N) as f64 / ((shenango::microtime() - start_us) as f64 * 0.000001);
44 |     println!(
45 |         "spawned {} threads / second, efficiency {}",
46 |         threads_per_second,
47 |         threads_per_second / (NCORES * N) as f64
48 |     );
49 | }
50 | 
51 | fn main() {
52 |     let args: Vec<_> = ::std::env::args().collect();
53 |     assert!(args.len() >= 2, "arg must be config file");
54 |     shenango::runtime_init(args[1].clone(), main_handler).unwrap();
55 | }
56 | 


--------------------------------------------------------------------------------
/breakwater/Makefile:
--------------------------------------------------------------------------------
 1 | ROOT_PATH=..
 2 | include $(ROOT_PATH)/build/shared.mk
 3 | 
 4 | CFLAGS += -I$(ROOT_PATH)/breakwater/inc
 5 | 
 6 | # breakwater - RPC layer with server overload control
 7 | breakwater_src = $(wildcard src/*.c)
 8 | breakwater_obj = $(breakwater_src:.c=.o)
 9 | 
10 | all: libbw.a
11 | 
12 | libbw.a: $(breakwater_obj)
13 | 	$(AR) rcs $@ $^
14 | 
15 | # general build rules for all targets
16 | src = $(breakwater_src)
17 | obj = $(src:.c=.o)
18 | dep = $(obj:.o=.d)
19 | 
20 | ifneq ($(MAKECMDGOALS),clean)
21 | -include $(dep)		# include all dep files in the makefile
22 | endif
23 | 
24 | # rule to generate a dep file by using the C preprocessor
25 | %.d: %.c
26 | 	@$(CC) $(CFLAGS) $< -MM -MT $(@:.d=.o) >$@
27 | %.o: %.c
28 | 	@$(CC) $(CFLAGS) -c $< -o $@
29 | 
30 | .PHONY: clean
31 | clean:
32 | 	rm -f $(obj) $(dep) libbw.a
33 | 


--------------------------------------------------------------------------------
/breakwater/apps/netbench/Makefile:
--------------------------------------------------------------------------------
 1 | # Makefile for netbench
 2 | ROOT_PATH=../../..
 3 | include $(ROOT_PATH)/build/shared.mk
 4 | 
 5 | BW_LIBS = $(ROOT_PATH)/breakwater/libbw.a
 6 | 
 7 | lib_src = synthetic_worker.cc util.cc
 8 | lib_obj = $(lib_src:.cc=.o)
 9 | 
10 | netbench_src = netbench.cc
11 | netbench_obj = $(netbench_src:.cc=.o)
12 | 
13 | librt_libs = $(ROOT_PATH)/bindings/cc/librt++.a
14 | libbw_libs = $(ROOT_PATH)/breakwater/bindings/cc/libbw++.a
15 | INC += -I$(ROOT_PATH)/breakwater/inc
16 | INC += -I$(ROOT_PATH)/bindings
17 | INC += -I$(ROOT_PATH)/breakwater/bindings/cc/inc
18 | 
19 | RUNTIME_LIBS := $(RUNTIME_LIBS) $(BW_LIBS) -lnuma
20 | 
21 | # must be first
22 | all: netbench
23 | 
24 | netbench: $(lib_obj) $(netbench_obj) $(librt_libs) $(libbw_libs) $(RUNTIME_DEPS)
25 | 	$(LDXX) -o $@ $(LDFLAGS) $(lib_obj) $(netbench_obj) \
26 | 	$(librt_libs) $(libbw_libs) $(RUNTIME_LIBS)
27 | 
28 | # general build rules for all targets
29 | src = $(lib_src) $(netbench_src)
30 | obj = $(src:.cc=.o)
31 | dep = $(obj:.o=.d)
32 | 
33 | ifneq ($(MAKECMDGOALS),clean)
34 | -include $(dep)   # include all dep files in the makefile
35 | endif
36 | 
37 | # rule to generate a dep file by using the C preprocessor
38 | # (see man cpp for details on the -MM and -MT options)
39 | %.d: %.cc
40 | 	@$(CXX) $(CXXFLAGS) $< -MM -MT $(@:.d=.o) >$@
41 | %.o: %.cc
42 | 	$(CXX) $(CXXFLAGS) -c $< -o $@
43 | 
44 | .PHONY: clean
45 | clean:
46 | 	rm -f $(obj) $(dep) netbench
47 | 


--------------------------------------------------------------------------------
/breakwater/apps/netbench/client.config:
--------------------------------------------------------------------------------
1 | # an example runtime config file
2 | host_addr 192.168.1.107
3 | host_netmask 255.255.255.0
4 | host_gateway 192.168.1.1
5 | runtime_kthreads 16
6 | runtime_spinning_kthreads 16
7 | enable_directpath 1
8 | 


--------------------------------------------------------------------------------
/breakwater/apps/netbench/server.config:
--------------------------------------------------------------------------------
1 | # an example runtime config file
2 | host_addr 192.168.1.103
3 | host_netmask 255.255.255.0
4 | host_gateway 192.168.1.1
5 | runtime_kthreads 10
6 | enable_directpath 1
7 | 


--------------------------------------------------------------------------------
/breakwater/apps/netbench/util.cc:
--------------------------------------------------------------------------------
 1 | #include "util.h"
 2 | 
 3 | std::vector<std::string> split(const std::string &text, char sep) {
 4 |   std::vector<std::string> tokens;
 5 |   std::string::size_type start = 0, end = 0;
 6 |   while ((end = text.find(sep, start)) != std::string::npos) {
 7 |     tokens.push_back(text.substr(start, end - start));
 8 |     start = end + 1;
 9 |   }
10 |   tokens.push_back(text.substr(start));
11 |   return tokens;
12 | }
13 | 


--------------------------------------------------------------------------------
/breakwater/apps/netbench/util.h:
--------------------------------------------------------------------------------
 1 | // util.h - a collection of shared utilities
 2 | 
 3 | #pragma once
 4 | 
 5 | #include "cc/timer.h"
 6 | 
 7 | #include <chrono>
 8 | #include <string>
 9 | #include <tuple>
10 | #include <vector>
11 | 
12 | using namespace std::chrono;
13 | 
14 | struct work_unit {
15 |   double start_us, work_us, duration_us;
16 |   int cpu;
17 | };
18 | 
19 | template <class Arrival, class Service>
20 | std::vector<work_unit> GenerateWork(Arrival a, Service s, double cur_us,
21 |                                     double last_us, int cpu) {
22 |   std::vector<work_unit> w;
23 |   while (cur_us < last_us) {
24 |     cur_us += a();
25 |     w.emplace_back(work_unit{cur_us, s(), 0, cpu});
26 |   }
27 |   return w;
28 | }
29 | 
30 | template <class Arrival, class Service>
31 | std::vector<work_unit> GenerateWork(Arrival a, Service *s, double cur_us,
32 |                                     double last_us, int cpu) {
33 |   std::vector<work_unit> w;
34 |   while (cur_us < last_us) {
35 |     cur_us += a();
36 |     w.emplace_back(work_unit{cur_us, (*s)(), 0, cpu});
37 |   }
38 |   return w;
39 | }
40 | 
41 | std::vector<std::string> split(const std::string &text, char sep);
42 | 
43 | class Timer {
44 |  public:
45 |   using micro = duration<double, std::micro>;
46 | 
47 |   Timer() {
48 |     barrier();
49 |     start_ts_ = steady_clock::now();
50 |     barrier();
51 |   }
52 |   ~Timer(){};
53 | 
54 |   // Reset the timer start time.
55 |   void Reset() {
56 |     barrier();
57 |     start_ts_ = steady_clock::now();
58 |     barrier();
59 |   }
60 | 
61 |   // Returns the microseconds elapsed since the timer was constructed.
62 |   double Elapsed() {
63 |     barrier();
64 |     auto now = steady_clock::now();
65 |     barrier();
66 |     return duration_cast<micro>(now - start_ts_).count();
67 |   }
68 | 
69 |   // Busy spin until the deadline (in microseconds) passes.
70 |   void SpinUntil(double deadline) {
71 |     while (Elapsed() < deadline) cpu_relax();
72 |   }
73 | 
74 |   // Sleep until the deadline (in microseconds) passes.
75 |   void SleepUntil(double deadline) {
76 |     double diff = deadline - Elapsed();
77 |     if (diff <= 0) return;
78 |     rt::Sleep(static_cast<uint64_t>(diff));
79 |   }
80 | 
81 |  private:
82 |   time_point<steady_clock> start_ts_;
83 | };
84 | 


--------------------------------------------------------------------------------
/breakwater/bindings/cc/Makefile:
--------------------------------------------------------------------------------
 1 | # Makefile for C++ binidngs
 2 | ROOT_PATH=../../..
 3 | include $(ROOT_PATH)/build/shared.mk
 4 | 
 5 | CXXFLAGS += -I$(ROOT_PATH)/breakwater/inc
 6 | CXXFLAGS += -I$(ROOT_PATH)/breakwater/bindings/cc/inc
 7 | 
 8 | # libbw+++.a - the c++ breakwater library
 9 | bw_src = rpc++.cc
10 | bw_obj = $(bw_src:.cc=.o)
11 | 
12 | all: libbw++.a
13 | 
14 | libbw++.a: $(bw_obj)
15 | 	$(AR) rcs $@ $^
16 | 
17 | #general build rules for all targets
18 | src = $(bw_src)
19 | obj = $(src:.cc=.o)
20 | dep = $(obj:.o=.d)
21 | 
22 | ifneq ($(MAKECMDGOALS),clean)
23 | -include $(dep)		# include all dep files in the makefile
24 | endif
25 | 
26 | # rule to generate a dep file by using the C preprocessor
27 | %.d: %.cc
28 | 	@$(CXX) $(CXXFLAGS) $< -MM -MT $(@:.d=.o) >$@
29 | %.o: %.cc
30 | 	$(CXX) $(CXXFLAGS) -c $< -o $@
31 | 
32 | .PHONY: clean
33 | clean:
34 | 	rm -f $(obj) $(dep) libbw++.a
35 | 


--------------------------------------------------------------------------------
/breakwater/bindings/cc/inc/breakwater/rpc++.h:
--------------------------------------------------------------------------------
 1 | // rpc.h - support for remote procedure calls (RPCs)
 2 | 
 3 | #pragma once
 4 | 
 5 | extern "C" {
 6 | #include <base/stddef.h>
 7 | #include <breakwater/rpc.h>
 8 | }
 9 | 
10 | #include <functional>
11 | 
12 | namespace rpc {
13 | 
14 | class RpcClient {
15 |  public:
16 |   // The maximum size of an RPC request payload.
17 |   static constexpr size_t kMaxPayloadSize = SRPC_BUF_SIZE;
18 | 
19 |   // Disable move and copy.
20 |   RpcClient(const RpcClient&) = delete;
21 |   RpcClient& operator=(const RpcClient&) = delete;
22 | 
23 |   // Creates an RPC session.
24 |   static RpcClient *Dial(netaddr raddr, int id);
25 | 
26 |   // Sends an RPC request.
27 |   ssize_t Send(const void *buf, size_t len, int hash);
28 | 
29 |   // Receives an RPC request.
30 |   ssize_t Recv(void *buf, size_t len, uint64_t *latency);
31 | 
32 |   uint32_t WinAvail();
33 | 
34 |   void StatClear();
35 | 
36 |   uint64_t StatWinuRx();
37 | 
38 |   uint64_t StatWinuTx();
39 | 
40 |   uint64_t StatRespRx();
41 | 
42 |   uint64_t StatReqTx();
43 | 
44 |   uint64_t StatWinExpired();
45 | 
46 |   uint64_t StatReqDropped();
47 | 
48 |   // Shuts down the RPC connection.
49 |   int Shutdown(int how);
50 |   // Aborts the RPC connection.
51 |   void Abort();
52 | 
53 |   void Close();
54 | 
55 |  private:
56 |   RpcClient(struct crpc_session *s) : s_(s) { }
57 | 
58 |   // The client session object.
59 |   struct crpc_session *s_;
60 | };
61 | 
62 | // Enables the RPC server, listening for new sessions.
63 | // Can only be called once.
64 | int RpcServerEnable(std::function<void(struct srpc_ctx *)> f);
65 | 
66 | uint64_t RpcServerStatWinuRx();
67 | uint64_t RpcServerStatWinuTx();
68 | uint64_t RpcServerStatWinTx();
69 | uint64_t RpcServerStatReqRx();
70 | uint64_t RpcServerStatReqDropped();
71 | uint64_t RpcServerStatRespTx();
72 | } // namespace rpc
73 | 


--------------------------------------------------------------------------------
/breakwater/inc/breakwater/breakwater.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * breakwater.h - breakwater implementation for RPC layer
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <base/types.h>
 8 | #include <base/atomic.h>
 9 | #include <runtime/sync.h>
10 | 
11 | #include "rpc.h"
12 | 
13 | /* for RPC server */
14 | 
15 | struct sbw_ctx {
16 | 	struct srpc_ctx		cmn;
17 | 	uint64_t		ts_sent;
18 | 	bool			drop;
19 | };
20 | 
21 | /* for RPC client */
22 | struct cbw_session {
23 | 	struct crpc_session	cmn;
24 | 	uint64_t		id;
25 | 	uint64_t		req_id;
26 | 	mutex_t			lock;
27 | 	waitgroup_t		timer_waiter;
28 | 	bool			waiting_winupdate;
29 | 	uint32_t		win_avail;
30 | 	uint32_t		win_used;
31 | 	bool			running;
32 | 	bool			demand_sync;
33 | 	condvar_t		timer_cv;
34 | 	bool			init;
35 | 
36 | 	/* a queue of pending RPC requests */
37 | 	uint32_t		head;
38 | 	uint32_t		tail;
39 | 	struct crpc_ctx		*qreq[CRPC_QLEN];
40 | 
41 | 	/* client-side stats */
42 | 	uint64_t		winu_rx_;
43 | 	uint64_t		winu_tx_;
44 | 	uint64_t		resp_rx_;
45 | 	uint64_t		req_tx_;
46 | 	uint64_t		win_expired_;
47 | 	uint64_t		req_dropped_;
48 | };
49 | 


--------------------------------------------------------------------------------
/breakwater/inc/breakwater/dagor.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * dagor.h - DAGOR implementation for RPC layer
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <base/types.h>
 8 | #include <base/atomic.h>
 9 | #include <runtime/sync.h>
10 | 
11 | #include "rpc.h"
12 | 
13 | /* for RPC server */
14 | 
15 | struct sdg_ctx {
16 | 	struct srpc_ctx		cmn;
17 | 	uint64_t		ts_sent;
18 | 	bool			drop;
19 | };
20 | 
21 | /* for RPC client */
22 | 
23 | struct cdg_ctx {
24 | 	struct crpc_ctx		cmn;
25 | 	int			prio;
26 | };
27 | 
28 | struct cdg_session {
29 | 	struct crpc_session	cmn;
30 | 	uint64_t		id;
31 | 	uint64_t		req_id;
32 | 	int			local_prio;
33 | 	mutex_t			lock;
34 | 	bool			running;
35 | 	condvar_t		sender_cv;
36 | 	waitgroup_t		sender_waiter;
37 | 
38 | 	/* a queue of pending RPC requests */
39 | 	uint32_t		head;
40 | 	uint32_t		tail;
41 | 	struct cdg_ctx		*qreq[CRPC_QLEN];
42 | 
43 | 	/* client-side stats */
44 | 	uint64_t		winu_rx_;
45 | 	uint64_t		winu_tx_;
46 | 	uint64_t		resp_rx_;
47 | 	uint64_t		req_tx_;
48 | 	uint64_t		win_expired_;
49 | 	uint64_t		req_dropped_;
50 | };
51 | 


--------------------------------------------------------------------------------
/breakwater/inc/breakwater/nocontrol.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * nocontrol.h - No server overload implementation
 3 |  * for RPC layer
 4 |  */
 5 | 
 6 | #pragma once
 7 | 
 8 | #include <base/types.h>
 9 | #include <runtime/sync.h>
10 | 
11 | #include "rpc.h"
12 | 
13 | /* for RPC server */
14 | 
15 | struct snc_ctx {
16 | 	struct srpc_ctx		cmn;
17 | 	uint64_t		ts;
18 | };
19 | 
20 | /* for RPC client */
21 | struct cnc_session {
22 | 	struct crpc_session	cmn;
23 | 	uint64_t		id;
24 | 	uint64_t		req_id;
25 | 	mutex_t			lock;
26 | 	bool			running;
27 | 	condvar_t		sender_cv;
28 | 	waitgroup_t		sender_waiter;
29 | 
30 | 	/* a queue of pending RPC requests */
31 | 	uint32_t		head;
32 | 	uint32_t		tail;
33 | 	struct crpc_ctx		*qreq[CRPC_QLEN];
34 | 
35 | 	/* client-side stats */
36 | 	uint64_t		winu_rx_;
37 | 	uint64_t		winu_tx_;
38 | 	uint64_t		resp_rx_;
39 | 	uint64_t		req_tx_;
40 | 	uint64_t		win_expired_;
41 | 	uint64_t		req_dropped_;
42 | };
43 | 


--------------------------------------------------------------------------------
/breakwater/inc/breakwater/seda.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * seda.h - SEDA implementation for RPC layer
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <base/types.h>
 8 | #include <base/atomic.h>
 9 | #include <runtime/sync.h>
10 | 
11 | #include "rpc.h"
12 | 
13 | /* for RPC server */
14 | 
15 | struct ssd_ctx {
16 | 	struct srpc_ctx		cmn;
17 | 	uint64_t		ts;
18 | };
19 | 
20 | /* for RPC client */
21 | #define SEDA_NREQ 100
22 | 
23 | struct csd_session {
24 | 	struct crpc_session	cmn;
25 | 	uint64_t		id;
26 | 	uint64_t		req_id;
27 | 	mutex_t			lock;
28 | 	condvar_t		timer_cv;
29 | 	waitgroup_t		timer_waiter;
30 | 	bool			running;
31 | 	condvar_t		sender_cv;
32 | 	waitgroup_t		sender_waiter;
33 | 
34 | 	/* token bucket for rate limiting */
35 | 	double			tb_token;
36 | 	double			tb_refresh_rate;
37 | 	uint64_t		tb_last_refresh;
38 | 
39 | 	int32_t			res_ts[SEDA_NREQ];
40 | 	int			res_idx;
41 | 	double			cur;
42 | 	uint64_t		seda_last_update;
43 | 
44 | 	/* a queue of pending RPC requests */
45 | 	uint32_t		head;
46 | 	uint32_t		tail;
47 | 	struct crpc_ctx		*qreq[CRPC_QLEN];
48 | 
49 | 	/* client-side stats */
50 | 	uint64_t		winu_rx_;
51 | 	uint64_t		winu_tx_;
52 | 	uint64_t		resp_rx_;
53 | 	uint64_t		req_tx_;
54 | 	uint64_t		win_expired_;
55 | 	uint64_t		req_dropped_;
56 | };
57 | 


--------------------------------------------------------------------------------
/breakwater/scripts/setup_machine.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # run with sudo
 3 | 
 4 | pushd ..
 5 | 
 6 | # Build ksched.ko
 7 | cd ksched && make clean && make && cd ..
 8 | 
 9 | # Shenango setup
10 | ./scripts/setup_machine.sh
11 | 
12 | # turn on cstate
13 | killall cstate
14 | cd scripts
15 | gcc cstate.c -o cstate
16 | ./cstate 0 &
17 | cd ..
18 | 
19 | # Disable frequency scaling
20 | echo performance | tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
21 | 
22 | # Disable turbo boost
23 | echo 1 | tee /sys/devices/system/cpu/intel_pstate/no_turbo
24 | 
25 | popd
26 | 


--------------------------------------------------------------------------------
/breakwater/src/bw_config.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * bw_config.h - Breakwater configurations
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | /* Recommended parameters (in XL170 environment)
 8 | * - 1 us average service time
 9 | * #define SBW_MIN_DELAY_US		45
10 | * #define SBW_DROP_THRESH		90
11 | *
12 | * - 10 us average service time
13 | * #define SBW_MIN_DELAY_US		80
14 | * #define SBW_DROP_THRESH		160
15 | *
16 | * - 100 us average service time
17 | * #define SBW_MIN_DELAY_US		500
18 | * #define SBW_DROP_THRESH		1000
19 | */
20 | 
21 | /* delay threshold to detect congestion */
22 | #define SBW_MIN_DELAY_US		80
23 | /* delay threshold for AQM */
24 | #define SBW_DROP_THRESH			160
25 | 
26 | /* round trip time in us */
27 | #define SBW_RTT_US			10
28 | 
29 | #define SBW_AI				0.001
30 | #define SBW_MD				0.02
31 | #define CBW_MAX_CLIENT_DELAY_US		10
32 | 


--------------------------------------------------------------------------------
/breakwater/src/bw_proto.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * bw_proto.h - RPC protocol definitions for BreakWater
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <base/types.h>
 8 | 
 9 | #define BW_REQ_MAGIC	0x63727063 /* 'crpc' */
10 | #define BW_RESP_MAGIC	0x73727063 /* 'srpc' */
11 | 
12 | enum {
13 | 	BW_OP_CALL = 0,  /* performs a procedure call */
14 | 	BW_OP_WINUPDATE, /* just updates the window (no call) */
15 | 	BW_OP_MAX,	  /* maximum number of opcodes */
16 | };
17 | 
18 | #define BW_CFLAG_DSYNC	0x01
19 | 
20 | #define BW_SFLAG_DROP	0x01
21 | 
22 | /* header used for CLIENT -> SERVER */
23 | struct cbw_hdr {
24 | 	uint32_t	magic; /* must be set to RPC_REQ_MAGIC */
25 | 	uint32_t	op;    /* the opcode */
26 | 	size_t		len;   /* length of request in bytes */
27 | 	uint64_t	id;    /* Request / Response ID */
28 | 	uint64_t	demand;/* the demanded window size */
29 | 	uint64_t	ts_sent;
30 | 	uint8_t		flags;
31 | };
32 | 
33 | /* header used for SERVER -> CLIENT */
34 | struct sbw_hdr {
35 | 	uint32_t	magic; /* must be set to RPC_RESP_MAGIC */
36 | 	uint32_t	op;    /* the opcode */
37 | 	size_t		len;   /* length of response in bytes */
38 | 	uint64_t	id;    /* Request / Response ID */
39 | 	uint64_t	win;   /* the offered window size */
40 | 	uint64_t	ts_sent;
41 | 	uint8_t		flags;
42 | };
43 | 


--------------------------------------------------------------------------------
/breakwater/src/dg_config.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * dg_config.h - Dagor configurations
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | /* Recommended parameters with 1,000 clinets
 8 | *  in XL170 environment
 9 | * - 1 us average service time
10 | * (bimod) #define DAGOR_OVERLOAD_THRESH		20
11 | * (exp) #define DAGOR_OVERLOAD_THRESH		30
12 | * (const) #define DAGOR_OVERLOAD_THRESH		30
13 | *
14 | * - 10 us average service time
15 | * (bimod) #define DAGOR_OVERLOAD_THRESH		70
16 | * (exp) #define DAGOR_OVERLOAD_THRESH		60
17 | * (const) #define DAGOR_OVERLOAD_THRESH		50
18 | *
19 | * - 100 us average service time
20 | * (bimod) #define DAGOR_OVERLOAD_THRESH		450
21 | * (exp) #define DAGOR_OVERLOAD_THRESH		400
22 | * (const) #define DAGOR_OVERLOAD_THRESH		350
23 | */
24 | 
25 | /* delay threshold to detect congestion */
26 | #define DAGOR_OVERLOAD_THRESH	60	// in us
27 | /* max priority update interval */
28 | #define DAGOR_PRIO_UPDATE_INT	1000	// in us
29 | /* max # requests for priority update */
30 | #define DAGOR_PRIO_UPDATE_REQS	2000	// in # reqs
31 | /* queueing delay monitor interval */
32 | #define DAGOR_PRIO_MONITOR	10
33 | /* decrement factor when congested */
34 | #define DAGOR_ALPHA		0.95
35 | /* increment factor when uncongested */
36 | #define DAGOR_BETA		0.01
37 | 
38 | #define CDG_BATCH_WAIT_US	0
39 | 


--------------------------------------------------------------------------------
/breakwater/src/dg_proto.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * proto.h - RPC protocol definitions
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <base/types.h>
 8 | 
 9 | #define DG_REQ_MAGIC	0x63727063 /* 'crpc' */
10 | #define DG_RESP_MAGIC	0x73727063 /* 'srpc' */
11 | #define DG_MAX_PRIO	128
12 | 
13 | enum {
14 | 	DG_OP_CALL = 0,  /* performs a procedure call */
15 | 	DG_OP_WINUPDATE, /* just updates the window (no call) */
16 | 	DG_OP_MAX,	  /* maximum number of opcodes */
17 | };
18 | 
19 | #define DG_SFLAG_DROP	0x01
20 | 
21 | /* header used for CLIENT -> SERVER */
22 | struct cdg_hdr {
23 | 	uint32_t	magic; /* must be set to RPC_REQ_MAGIC */
24 | 	uint32_t	op;    /* the opcode */
25 | 	size_t		len;   /* length of request in bytes */
26 | 	uint64_t	id;    /* Request / Response ID */
27 | 	int		prio;  /* the demanded window size */
28 | 	uint64_t	ts_sent;
29 | };
30 | 
31 | /* header used for SERVER -> CLIENT */
32 | struct sdg_hdr {
33 | 	uint32_t	magic; /* must be set to RPC_RESP_MAGIC */
34 | 	uint32_t	op;    /* the opcode */
35 | 	size_t		len;   /* length of response in bytes */
36 | 	uint64_t	id;    /* Request / Response ID */
37 | 	int		prio;  /* the offered window size */
38 | 	uint64_t	ts_sent;
39 | 	uint8_t		flags;
40 | };
41 | 


--------------------------------------------------------------------------------
/breakwater/src/nc_config.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * nc_config.h - NoControl configurations
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | /* turn on AQM? */
 8 | #define SNC_AQM_ON		false
 9 | /* AQM Threshold */
10 | #define SNC_AQM_THRESH		2000
11 | 


--------------------------------------------------------------------------------
/breakwater/src/nc_proto.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * proto.h - RPC protocol definitions for NoControl
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <base/types.h>
 8 | 
 9 | #define NC_REQ_MAGIC	0x63727063 /* 'crpc' */
10 | #define NC_RESP_MAGIC	0x73727063 /* 'srpc' */
11 | 
12 | enum {
13 | 	NC_OP_CALL = 0,  /* performs a procedure call */
14 | 	NC_OP_WINUPDATE, /* just updates the window (no call) */
15 | 	NC_OP_MAX,	  /* maximum number of opcodes */
16 | };
17 | 
18 | /* header used for CLIENT -> SERVER */
19 | struct cnc_hdr {
20 | 	uint32_t	magic; /* must be set to RPC_REQ_MAGIC */
21 | 	uint32_t	op;    /* the opcode */
22 | 	size_t		len;   /* length of request in bytes */
23 | 	uint64_t	id;    /* Request / Response ID */
24 | 	uint64_t	ts;
25 | };
26 | 
27 | /* header used for SERVER -> CLIENT */
28 | struct snc_hdr {
29 | 	uint32_t	magic; /* must be set to RPC_RESP_MAGIC */
30 | 	uint32_t	op;    /* the opcode */
31 | 	size_t		len;   /* length of response in bytes */
32 | 	uint64_t	id;    /* Request / Response ID */
33 | 	uint64_t	ts;
34 | };
35 | 


--------------------------------------------------------------------------------
/breakwater/src/sd_config.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * sd_config.h - SEDA configurations
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | /* Recommended parameters with 1,000 clinets
 8 | *  in XL170 environment
 9 | * - 1 us average service time
10 | * #define CSD_MAX_CLIENT_DELAY_US	10
11 | * #define CSD_TB_INIT_RATE		4
12 | * #define CSD_TB_MIN_RATE		2
13 | * #define SEDA_TARGET			50
14 | * #define SEDA_ADJ_I			120
15 | * #define SEDA_ADJ_D			1.04
16 | *
17 | * - 10 us average service time
18 | * #define CSD_MAX_CLIENT_DELAY_US	100
19 | * #define CSD_TB_INIT_RATE		4
20 | * #define CSD_TB_MIN_RATE		2
21 | * #define SEDA_TARGET			80
22 | * #define SEDA_ADJ_I			40
23 | * #define SEDA_ADJ_D			1.04
24 | *
25 | * - 100 us average service time
26 | * #define CSD_MAX_CLIENT_DELAY_US	100
27 | * #define CSD_TB_INIT_RATE		4
28 | * #define CSD_TB_MIN_RATE		2
29 | * #define SEDA_TARGET			720
30 | * #define SEDA_ADJ_I			10
31 | * #define SEDA_ADJ_D			1.3
32 | */
33 | 
34 | /* maximum client delay */
35 | #define CSD_MAX_CLIENT_DELAY_US		100
36 | /* Token bucket initial rate (reqs/sec) */
37 | #define CSD_TB_INIT_RATE		4
38 | /* Token bucket minimum rate (reqs/sec) */
39 | #define CSD_TB_MIN_RATE			2
40 | /* Token bucket maximum number of token (burstiness) */
41 | #define CSD_TB_MAX_TOKEN		4
42 | /* EWMA filter constant */
43 | #define SEDA_ALPHA			0.7
44 | /* target 90th percentile delay */
45 | #define SEDA_TARGET			80
46 | /* time before controller run */
47 | #define SEDA_TIMEOUT			1000
48 | /* % error to trigger decrease */
49 | #define SEDA_ERR_D			0.0
50 | /* % error to trigger increase */
51 | #define SEDA_ERR_I			-0.5
52 | /* additive rate increase */
53 | #define SEDA_ADJ_I			4.0
54 | /* multiplicative rate decrease */
55 | #define SEDA_ADJ_D			1.1
56 | /* weight on additive increase */
57 | #define SEDA_CI				-0.1
58 | 


--------------------------------------------------------------------------------
/breakwater/src/sd_proto.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * proto.h - RPC protocol definitions for SEDA
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <base/types.h>
 8 | 
 9 | #define SD_REQ_MAGIC	0x63727063 /* 'crpc' */
10 | #define SD_RESP_MAGIC	0x73727063 /* 'srpc' */
11 | 
12 | enum {
13 | 	SD_OP_CALL = 0,  /* performs a procedure call */
14 | 	SD_OP_WINUPDATE, /* just updates the window (no call) */
15 | 	SD_OP_MAX,	  /* maximum number of opcodes */
16 | };
17 | 
18 | /* header used for CLIENT -> SERVER */
19 | struct csd_hdr {
20 | 	uint32_t	magic; /* must be set to RPC_REQ_MAGIC */
21 | 	uint32_t	op;    /* the opcode */
22 | 	size_t		len;   /* length of request in bytes */
23 | 	uint64_t	id;    /* Request / Response ID */
24 | 	uint64_t	ts;
25 | };
26 | 
27 | /* header used for SERVER -> CLIENT */
28 | struct ssd_hdr {
29 | 	uint32_t	magic; /* must be set to RPC_RESP_MAGIC */
30 | 	uint32_t	op;    /* the opcode */
31 | 	size_t		len;   /* length of response in bytes */
32 | 	uint64_t	id;    /* Request / Response ID */
33 | 	uint64_t	ts;
34 | };
35 | 


--------------------------------------------------------------------------------
/breakwater/src/util.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * util.c - utility functions for RPC
  3 |  */
  4 | 
  5 | #include "util.h"
  6 | 
  7 | /**
  8 |  * tcp_read_full - reads exactly the requested bytes or fails
  9 |  * @c: the TCP connection to read from
 10 |  * @buf: the buffer to store the read
 11 |  * @len: the exact length of the read
 12 |  *
 13 |  * Returns @len bytes or <= 0 if there was an error.
 14 |  */
 15 | ssize_t tcp_read_full(tcpconn_t *c, void *buf, size_t len)
 16 | {
 17 | 	char *pos = buf;
 18 | 	size_t n = 0;
 19 | 
 20 | 	while (n < len) {
 21 | 		ssize_t ret = tcp_read(c, pos + n, len - n);
 22 | 		if (ret <= 0)
 23 | 			return ret;
 24 | 		n += ret;
 25 | 	}
 26 | 
 27 | 	assert(n == len);
 28 | 	return n;
 29 | }
 30 | 
 31 | /**
 32 |  * tcp_write_full - writes exactly the requested bytes or fails
 33 |  * @c: the TCP connection to write to
 34 |  * @buf: the buffer to write to the socket
 35 |  * @len: the exact length of the write
 36 |  *
 37 |  * Returns @len bytes or < 0 if there was an error.
 38 |  */
 39 | ssize_t tcp_write_full(tcpconn_t *c, const void *buf, size_t len)
 40 | {
 41 | 	const char *pos = buf;
 42 | 	size_t n = 0;
 43 | 
 44 | 	while (n < len) {
 45 | 		ssize_t ret = tcp_write(c, pos + n, len - n);
 46 | 		if (ret < 0)
 47 | 			return ret;
 48 | 		assert(ret > 0);
 49 | 		n += ret;
 50 | 	}
 51 | 
 52 | 	assert(n == len);
 53 | 	return n;
 54 | }
 55 | 
 56 | static bool pull_iov(struct iovec **iovp, int *iovcntp, size_t n)
 57 | {
 58 | 	struct iovec *iov = *iovp;
 59 | 	int iovcnt = *iovcntp, i;
 60 | 
 61 | 	for (i = 0; i < iovcnt; i++) {
 62 | 		if (n < iov[i].iov_len) {
 63 | 			iov[i].iov_base = (char *)iov[i].iov_base + n;
 64 | 			iov[i].iov_len -= n;
 65 | 			*iovp = &iov[i];
 66 | 			*iovcntp -= i;
 67 | 			return true;
 68 | 		}
 69 | 		n -= iov[i].iov_len;
 70 | 	}
 71 | 
 72 | 	assert (n == 0);
 73 | 	return false;
 74 | }
 75 | 
 76 | /**
 77 |  * tcp_writev_full - writes exactly the requested vector of bytes or fails
 78 |  * @c: the TCP connection to write to
 79 |  * @iov: the scatter-gather array of buffers to write
 80 |  * @iovcnt: the number of entries in @iov
 81 |  *
 82 |  * WARNING: @iov could be modified by this function, and its state is undefined
 83 |  * after calling it.
 84 |  *
 85 |  * Returns the number of written bytes or < 0 if there was an error.
 86 |  */
 87 | ssize_t tcp_writev_full(tcpconn_t *c, struct iovec *iov, int iovcnt)
 88 | {
 89 | 	ssize_t n, len = 0;
 90 | 
 91 | 	do {
 92 | 		n = tcp_writev(c, iov, iovcnt);
 93 | 		if (n < 0)
 94 | 			return n;
 95 | 		assert(n > 0);
 96 | 		len += n;
 97 | 	} while (pull_iov(&iov, &iovcnt, n));
 98 | 
 99 | 	return len;
100 | }
101 | 


--------------------------------------------------------------------------------
/breakwater/src/util.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * util.h - utility functions for RPC
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <base/stddef.h>
 8 | #include <runtime/tcp.h>
 9 | 
10 | extern ssize_t tcp_read_full(tcpconn_t *c, void *buf, size_t len);
11 | extern ssize_t tcp_write_full(tcpconn_t *c, const void *buf, size_t len);
12 | extern ssize_t tcp_writev_full(tcpconn_t *c, struct iovec *iov, int iovcnt);
13 | 


--------------------------------------------------------------------------------
/build/config:
--------------------------------------------------------------------------------
 1 | # build configuration options (set to y for "yes", n for "no")
 2 | 
 3 | # Enable Mellanox ConnectX-4,5 NIC Support
 4 | CONFIG_MLX5=n
 5 | # Enable SPDK NVMe support
 6 | CONFIG_SPDK=n
 7 | # Enable debug build mode (slower but enables several runtime checks)
 8 | CONFIG_DEBUG=n
 9 | # Enable additional compiler optimizations (may reduce compatibility)
10 | CONFIG_OPTIMIZE=n
11 | # Allow runtimes to access Mellanox ConnectX-5 NICs directly (kernel bypass)
12 | CONFIG_DIRECTPATH=n
13 | # Build with clang instead of gcc
14 | CONFIG_CLANG=n
15 | # Build with split TX pool
16 | CONFIG_SPLIT_TX=n


--------------------------------------------------------------------------------
/build/patches/dpdk/0001-config-extend-max-memseg-lists.patch:
--------------------------------------------------------------------------------
 1 | From 9859b2fe35c2f039659075bcc11d03f691107588 Mon Sep 17 00:00:00 2001
 2 | From: Josh Fried <joshuafried@gmail.com>
 3 | Date: Mon, 30 Dec 2024 17:27:30 -0600
 4 | Subject: [PATCH 1/3] config: extend max memseg lists
 5 | 
 6 | ---
 7 |  config/rte_config.h | 2 +-
 8 |  1 file changed, 1 insertion(+), 1 deletion(-)
 9 | 
10 | diff --git a/config/rte_config.h b/config/rte_config.h
11 | index da265d7dd2..88ad16cc37 100644
12 | --- a/config/rte_config.h
13 | +++ b/config/rte_config.h
14 | @@ -30,7 +30,7 @@
15 |  /* EAL defines */
16 |  #define RTE_CACHE_GUARD_LINES 1
17 |  #define RTE_MAX_HEAPS 32
18 | -#define RTE_MAX_MEMSEG_LISTS 128
19 | +#define RTE_MAX_MEMSEG_LISTS 1024
20 |  #define RTE_MAX_MEMSEG_PER_LIST 8192
21 |  #define RTE_MAX_MEM_MB_PER_LIST 32768
22 |  #define RTE_MAX_MEMSEG_PER_TYPE 32768
23 | -- 
24 | 2.43.0
25 | 
26 | 


--------------------------------------------------------------------------------
/build/patches/dpdk/0002-i40e-disable-itr.patch:
--------------------------------------------------------------------------------
 1 | From df9791417a07e6eb333be938f89bb7f9a20e865b Mon Sep 17 00:00:00 2001
 2 | From: Josh Fried <joshuafried@gmail.com>
 3 | Date: Mon, 30 Dec 2024 17:30:52 -0600
 4 | Subject: [PATCH 2/3] i40e: disable itr
 5 | 
 6 | ---
 7 |  drivers/net/i40e/i40e_ethdev.h | 4 ++--
 8 |  1 file changed, 2 insertions(+), 2 deletions(-)
 9 | 
10 | diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
11 | index 1bbe7ad376..3dac4b4be9 100644
12 | --- a/drivers/net/i40e/i40e_ethdev.h
13 | +++ b/drivers/net/i40e/i40e_ethdev.h
14 | @@ -193,9 +193,9 @@ enum i40e_flxpld_layer_idx {
15 |  /* Default queue interrupt throttling time in microseconds */
16 |  #define I40E_ITR_INDEX_DEFAULT          0
17 |  #define I40E_ITR_INDEX_NONE             3
18 | -#define I40E_QUEUE_ITR_INTERVAL_DEFAULT 32 /* 32 us */
19 | +#define I40E_QUEUE_ITR_INTERVAL_DEFAULT 0 /* 32 us */
20 |  #define I40E_QUEUE_ITR_INTERVAL_MAX     8160 /* 8160 us */
21 | -#define I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT 32 /* 32 us */
22 | +#define I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT 0 /* 32 us */
23 |  /* Special FW support this floating VEB feature */
24 |  #define FLOATING_VEB_SUPPORTED_FW_MAJ 5
25 |  #define FLOATING_VEB_SUPPORTED_FW_MIN 0
26 | -- 
27 | 2.43.0
28 | 
29 | 


--------------------------------------------------------------------------------
/build/patches/rdma-core/0004-expose-object-id.patch:
--------------------------------------------------------------------------------
 1 | From f793f54d9865e63c04885f9ef816239ac58ee3ba Mon Sep 17 00:00:00 2001
 2 | From: Josh Fried <joshuafried@gmail.com>
 3 | Date: Thu, 27 Apr 2023 11:52:00 -0400
 4 | Subject: [PATCH 4/4] expose object id
 5 | 
 6 | ---
 7 |  providers/mlx5/libmlx5.map | 1 +
 8 |  providers/mlx5/mlx5.c      | 5 +++++
 9 |  providers/mlx5/mlx5dv.h    | 1 +
10 |  3 files changed, 7 insertions(+)
11 | 
12 | diff --git a/providers/mlx5/libmlx5.map b/providers/mlx5/libmlx5.map
13 | index 375d2b38..a4f5fc07 100644
14 | --- a/providers/mlx5/libmlx5.map
15 | +++ b/providers/mlx5/libmlx5.map
16 | @@ -239,4 +239,5 @@ MLX5_1.24 {
17 |  		mlx5_vfio_get_clock;
18 |  		mlx5_access_reg;
19 |  		mlx5dv_dr_table_get_id;
20 | +		mlx5_devx_get_obj_id;
21 |  } MLX5_1.23;
22 | diff --git a/providers/mlx5/mlx5.c b/providers/mlx5/mlx5.c
23 | index 827712e0..242e553c 100644
24 | --- a/providers/mlx5/mlx5.c
25 | +++ b/providers/mlx5/mlx5.c
26 | @@ -247,6 +247,11 @@ int mlx5_cmd_status_to_err(uint8_t status)
27 |  	}
28 |  }
29 |  
30 | +uint32_t mlx5_devx_get_obj_id(struct mlx5dv_devx_obj *obj)
31 | +{
32 | +	return obj->object_id;
33 | +}
34 | +
35 |  int mlx5_get_cmd_status_err(int err, void *out)
36 |  {
37 |  	if (err == EREMOTEIO)
38 | diff --git a/providers/mlx5/mlx5dv.h b/providers/mlx5/mlx5dv.h
39 | index 7371b1a2..111493e6 100644
40 | --- a/providers/mlx5/mlx5dv.h
41 | +++ b/providers/mlx5/mlx5dv.h
42 | @@ -2219,6 +2219,7 @@ struct mlx5dv_devx_msi_vector {
43 |  };
44 |  
45 |  extern uint32_t mlx5dv_dr_table_get_id(struct mlx5dv_dr_table *tbl);
46 | +extern uint32_t mlx5_devx_get_obj_id(struct mlx5dv_devx_obj *obj);
47 |  
48 |  struct mlx5dv_devx_msi_vector *
49 |  mlx5dv_devx_alloc_msi_vector(struct ibv_context *ibctx);
50 | -- 
51 | 2.34.1
52 | 
53 | 


--------------------------------------------------------------------------------
/build/patches/rdma-core/0005-increase-max-number-of-qps-cqs.patch:
--------------------------------------------------------------------------------
 1 | From 150491685a48592a331bb919da78264221924290 Mon Sep 17 00:00:00 2001
 2 | From: Josh Fried <joshuafried@gmail.com>
 3 | Date: Wed, 9 Aug 2023 21:45:10 +0000
 4 | Subject: [PATCH 5/5] increase max number of qps/cqs
 5 | 
 6 | ---
 7 |  providers/mlx5/mlx5_vfio.c | 2 ++
 8 |  1 file changed, 2 insertions(+)
 9 | 
10 | diff --git a/providers/mlx5/mlx5_vfio.c b/providers/mlx5/mlx5_vfio.c
11 | index 614bd604..b0a94965 100644
12 | --- a/providers/mlx5/mlx5_vfio.c
13 | +++ b/providers/mlx5/mlx5_vfio.c
14 | @@ -1939,6 +1939,8 @@ static int handle_hca_cap(struct mlx5_vfio_context *ctx, void *set_ctx, int set_
15 |  		DEVX_SET(cmd_hca_cap, set_hca_cap, disable_link_up_by_init_hca, 1);
16 |  
17 |  	DEVX_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, sys_page_shift - 12);
18 | +	DEVX_SET(cmd_hca_cap, set_hca_cap, log_max_qp, MLX5_VFIO_CAP_GEN_MAX(ctx, log_max_qp));
19 | +	DEVX_SET(cmd_hca_cap, set_hca_cap, log_max_cq, MLX5_VFIO_CAP_GEN_MAX(ctx, log_max_cq));
20 |  
21 |  	if (MLX5_VFIO_CAP_GEN_MAX(ctx, mkey_by_name))
22 |  		DEVX_SET(cmd_hca_cap, set_hca_cap, mkey_by_name, 1);
23 | -- 
24 | 2.39.2
25 | 
26 | 


--------------------------------------------------------------------------------
/client.config:
--------------------------------------------------------------------------------
1 | # an example runtime config file
2 | host_addr 192.168.1.7
3 | host_netmask 255.255.255.0
4 | host_gateway 192.168.1.1
5 | runtime_kthreads 6
6 | runtime_spinning_kthreads 6
7 | runtime_guaranteed_kthreads 6
8 | runtime_priority lc
9 | 


--------------------------------------------------------------------------------
/inc/asm/atomic.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * atomic.h - utilities for atomic memory ops
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <base/compiler.h>
 8 | #include <base/assert.h>
 9 | 
10 | /**
11 |  * mb - a memory barrier
12 |  *
13 |  * Ensures all loads and stores before the barrier complete
14 |  * before all loads and stores after the barrier.
15 |  */
16 | #define mb() asm volatile("mfence" ::: "memory")
17 | 
18 | /**
19 |  * rmb - a read memory barrier
20 |  *
21 |  * Ensures all loads before the barrier complete before
22 |  * all loads after the barrier.
23 |  */
24 | #define rmb() barrier()
25 | 
26 | /**
27 |  * wmb - a write memory barrier
28 |  *
29 |  * Ensures all stores before the barrier complete before
30 |  * all stores after the barrier.
31 |  */
32 | #define wmb() barrier()
33 | 
34 | /**
35 |  * store_release - store a native value with release fence semantics
36 |  * @p: the pointer to store
37 |  * @v: the value to store
38 |  */
39 | #define store_release(p, v)			\
40 | do {						\
41 | 	BUILD_ASSERT(type_is_native(*p));	\
42 | 	barrier();				\
43 | 	ACCESS_ONCE(*p) = v;			\
44 | } while (0)
45 | 
46 | /**
47 |  * load_acquire - load a native value with acquire fence semantics
48 |  * @p: the pointer to load
49 |  */
50 | #define load_acquire(p)				\
51 | ({						\
52 | 	BUILD_ASSERT(type_is_native(*p));	\
53 | 	typeof(*p) __p = ACCESS_ONCE(*p);	\
54 | 	barrier();				\
55 | 	__p;					\
56 | })
57 | 
58 | /**
59 |  * load_consume - load a native value with consume fence semantics
60 |  * @p: the pointer to load
61 |  */
62 | #define load_consume(p)				\
63 | ({						\
64 | 	BUILD_ASSERT(type_is_native(*p));	\
65 | 	typeof(*p) __p = ACCESS_ONCE(*p);	\
66 | 	barrier();				\
67 | 	__p;					\
68 | })
69 | 


--------------------------------------------------------------------------------
/inc/asm/chksum.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * chksum.h - utilities for calculating checksums
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <stdint.h>
 8 | 
 9 | /**
10 |  * chksum_internet - performs an internet checksum on a buffer
11 |  * @buf: the buffer
12 |  * @len: the length in bytes
13 |  *
14 |  * An internet checksum is a 16-bit one's complement sum. Details
15 |  * are described in RFC 1071.
16 |  *
17 |  * Returns a 16-bit checksum value.
18 |  */
19 | static inline uint16_t chksum_internet(const void *buf, int len)
20 | {
21 |         uint64_t sum;
22 | 
23 |         asm volatile("xorq %0, %0\n"
24 | 
25 |              /* process 8 byte chunks */
26 |              "movl %2, %%edx\n"
27 |              "shrl $3, %%edx\n"
28 |              "cmp $0, %%edx\n"
29 |              "jz 2f\n"
30 |              "1: adcq (%1), %0\n"
31 |              "leaq 8(%1), %1\n"
32 |              "decl %%edx\n"
33 |              "jne 1b\n"
34 |              "adcq $0, %0\n"
35 | 
36 |              /* process 4 byte (if left) */
37 |              "2: test $4, %2\n"
38 |              "je 3f\n"
39 |              "movl (%1), %%edx\n"
40 |              "addq %%rdx, %0\n"
41 |              "adcq $0, %0\n"
42 |              "leaq 4(%1), %1\n"
43 | 
44 |              /* process 2 byte (if left) */
45 |              "3: test $2, %2\n"
46 |              "je 4f\n"
47 |              "movzwq (%1), %%rdx\n"
48 |              "addq %%rdx, %0\n"
49 |              "adcq $0, %0\n"
50 |              "leaq 2(%1), %1\n"
51 | 
52 |              /* process 1 byte (if left) */
53 |              "4: test $1, %2\n"
54 |              "je 5f\n"
55 |              "movzbq (%1), %%rdx\n"
56 |              "addq %%rdx, %0\n"
57 |              "adcq $0, %0\n"
58 | 
59 |              /* fold into 16-bit answer */
60 |              "5: movq %0, %1\n"
61 |              "shrq $32, %0\n"
62 |              "addl %k1, %k0\n"
63 |              "adcl $0, %k0\n"
64 |              "movq %0, %1\n"
65 |              "shrl $16, %k0\n"
66 |              "addw %w1, %w0\n"
67 |              "adcw $0, %w0\n"
68 | 	     "not %0\n"
69 | 
70 |             : "=&r"(sum), "=r"(buf)
71 |             : "r"(len), "1"(buf) : "%rdx", "cc", "memory");
72 | 
73 |         return (uint16_t)sum;
74 | }
75 | 
76 | 


--------------------------------------------------------------------------------
/inc/asm/cpu.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * cpu.h - basic definitions for x86_64 CPUs
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | /*
 8 |  * Endianness
 9 |  */
10 | 
11 | #define __LITTLE_ENDIAN	1234
12 | #define __BIG_ENDIAN	4321
13 | 
14 | #define __BYTE_ORDER	__LITTLE_ENDIAN
15 | 
16 | 
17 | /*
18 |  * Word Size
19 |  */
20 | 
21 | #define __32BIT_WORDS	32
22 | #define __64BIT_WORDS	64
23 | 
24 | #define __WORD_SIZE	__64BIT_WORDS
25 | 
26 | #define CACHE_LINE_SIZE	64
27 | 


--------------------------------------------------------------------------------
/inc/asm/ops.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ops.h - useful x86_64 instructions
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <features.h>
 8 | #include <base/types.h>
 9 | 
10 | static inline void cpu_relax(void)
11 | {
12 | #if __GNUC_PREREQ(10, 0)
13 | #  if __has_builtin(__builtin_ia32_pause)
14 | 	__builtin_ia32_pause();
15 | #  endif
16 | #else
17 | 	asm volatile("pause");
18 | #endif
19 | }
20 | 
21 | static inline void cpu_serialize(void)
22 | {
23 |         asm volatile("xorl %%eax, %%eax\n\t"
24 | 		     "cpuid" : : : "%rax", "%rbx", "%rcx", "%rdx");
25 | }
26 | 
27 | struct cpuid_info {
28 | 	unsigned int eax, ebx, ecx, edx;
29 | };
30 | 
31 | static inline void cpuid(int leaf, int subleaf, struct cpuid_info *regs)
32 | {
33 | 	asm volatile("cpuid" : "=a" (regs->eax), "=b" (regs->ebx),
34 | 		     "=c" (regs->ecx), "=d" (regs->edx) : "a" (leaf),
35 | 		     "c"(subleaf));
36 | }
37 | 
38 | static inline uint64_t rdtsc(void)
39 | {
40 | #if __GNUC_PREREQ(10, 0)
41 | #  if __has_builtin(__builtin_ia32_rdtsc)
42 | 	return __builtin_ia32_rdtsc();
43 | #  endif
44 | #else
45 | 	uint64_t a, d;
46 | 	asm volatile("rdtsc" : "=a" (a), "=d" (d));
47 | 	return a | (d << 32);
48 | #endif
49 | }
50 | 
51 | static inline uint64_t rdtscp(uint32_t *auxp)
52 | {
53 | 	uint64_t ret;
54 | 	uint32_t c;
55 | 
56 | #if __GNUC_PREREQ(10, 0)
57 | #  if __has_builtin(__builtin_ia32_rdtscp)
58 | 	ret = __builtin_ia32_rdtscp(&c);
59 | #  endif
60 | #else
61 | 	uint64_t a, d;
62 | 	asm volatile("rdtscp" : "=a" (a), "=d" (d), "=c" (c));
63 | 	ret = a | (d << 32);
64 | #endif
65 | 
66 | 	if (auxp)
67 | 		*auxp = c;
68 | 	return ret;
69 | }
70 | 
71 | static inline uint64_t __mm_crc32_u64(uint64_t crc, uint64_t val)
72 | {
73 | 	asm("crc32q %1, %0" : "+r" (crc) : "rm" (val));
74 | 	return crc;
75 | }
76 | 


--------------------------------------------------------------------------------
/inc/base/compiler.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * compiler.h - useful compiler hints, intrinsics, and attributes
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #ifndef likely
 8 | #define likely(x) __builtin_expect(!!(x), 1)
 9 | #endif
10 | #ifndef unlikely
11 | #define unlikely(x) __builtin_expect(!!(x), 0)
12 | #endif
13 | #ifndef __cplusplus
14 | #define unreachable() __builtin_unreachable()
15 | #endif
16 | 
17 | #define prefetch0(x) __builtin_prefetch((x), 0, 3)
18 | #define prefetch1(x) __builtin_prefetch((x), 0, 2)
19 | #define prefetch2(x) __builtin_prefetch((x), 0, 1)
20 | #define prefetchnta(x) __builtin_prefetch((x), 0, 0)
21 | #define prefetch(x) prefetch0(x)
22 | 
23 | /* variable attributes */
24 | #ifndef __packed
25 | #define __packed __attribute__((packed))
26 | #endif
27 | #define __notused __attribute__((unused))
28 | #ifndef __aligned
29 | #define __aligned(x) __attribute__((aligned(x)))
30 | #endif
31 | 
32 | /* function attributes */
33 | #define __noinline __attribute__((noinline))
34 | #define __noreturn __attribute__((noreturn))
35 | #define __must_use_return __attribute__((warn_unused_result))
36 | #define __pure __attribute__((pure))
37 | #define __weak __attribute__((weak))
38 | #define __malloc __attribute__((malloc))
39 | #define __assume_aligned(x) __attribute__((assume_aligned(x)))
40 | #define __nofp __attribute__((target("general-regs-only")))
41 | 
42 | #define GCC_VERSION (__GNUC__ * 10000        \
43 | 		     + __GNUC_MINOR__ * 100  \
44 | 		     + __GNUC_PATCHLEVEL__)
45 | 
46 | #if GCC_VERSION >= 40800
47 | #define HAS_BUILTIN_BSWAP 1
48 | #endif
49 | 
50 | #define barrier() asm volatile("" ::: "memory")
51 | 
52 | #define	ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
53 | 
54 | #define type_is_native(t) \
55 | 	(sizeof(t) == sizeof(char)  || \
56 | 	 sizeof(t) == sizeof(short) || \
57 | 	 sizeof(t) == sizeof(int)   || \
58 | 	 sizeof(t) == sizeof(long))
59 | 
60 | /*
61 |  * These attributes are defined only with the sparse checker tool.
62 |  */
63 | #ifdef __CHECKER__
64 | #define __rcu		__attribute__((noderef, address_space(1)))
65 | #define __perthread	__attribute__((noderef, address_space(2)))
66 | #define __force		__attribute__((force))
67 | #undef __assume_aligned
68 | #define __assume_aligned(x)
69 | #else /* __CHECKER__ */
70 | #define __rcu
71 | #define __perthread
72 | #define __force
73 | #endif /* __CHECKER__ */
74 | 


--------------------------------------------------------------------------------
/inc/base/cpu.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * cpu.h - detection for CPU topology
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <base/stddef.h>
 8 | #include <base/limits.h>
 9 | #include <base/bitmap.h>
10 | 
11 | extern int cpu_count; /* the number of available CPUs */
12 | extern int numa_count; /* the number of NUMA nodes */
13 | extern int numa_count_with_mem; /* the number of NUMA nodes with local memory */
14 | 
15 | struct cpu_info {
16 | 	DEFINE_BITMAP(thread_siblings_mask, NCPU);
17 | 	DEFINE_BITMAP(core_siblings_mask, NCPU);
18 | 	int package;
19 | };
20 | 
21 | extern struct cpu_info cpu_info_tbl[NCPU];
22 | 


--------------------------------------------------------------------------------
/inc/base/fd_transfer.h:
--------------------------------------------------------------------------------
1 | /*
2 |  * fd_transfer.h - utility functions for sending FDs across UNIX sockets.
3 |  */
4 | 
5 | #pragma once
6 | 
7 | extern int recv_fd(int controlfd, int *shared_fd_out);
8 | extern int send_fd(int controlfd, int shared_fd);
9 | 


--------------------------------------------------------------------------------
/inc/base/gen.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * gen.h - shared generation numbers
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <stdint.h>
 8 | 
 9 | #include <base/stddef.h>
10 | 
11 | /* describes a generation number */
12 | struct gen_num {
13 | 	uint32_t		prev_gen;
14 | 	volatile uint32_t	*gen;
15 | };
16 | 
17 | /*
18 |  * gen_active - used by a writer to indicate that a generation is ongoing
19 |  */
20 | static inline void gen_active(struct gen_num *g)
21 | {
22 | 	if (*g->gen == 0)
23 | 		*g->gen = g->prev_gen + 1;
24 | }
25 | 
26 | /*
27 |  * gen_inactive - used by a writer to indicate that we are between generations
28 |  */
29 | static inline void gen_inactive(struct gen_num *g)
30 | {
31 | 	if (*g->gen != 0) {
32 | 		g->prev_gen = *g->gen;
33 | 		*g->gen = 0;
34 | 	}
35 | }
36 | 
37 | /*
38 |  * gen_in_same_gen - used by a reader to determine if we are in the same
39 |  * generation as last time we checked
40 |  *
41 |  * Returns true if we are in the same generation as last time, false if we are
42 |  * in a different generation or are between generations.
43 |  */
44 | static inline bool gen_in_same_gen(struct gen_num *g)
45 | {
46 | 	uint32_t current_gen = *g->gen;
47 | 	bool unchanged;
48 | 
49 | 	unchanged = (current_gen != 0) && (current_gen == g->prev_gen);
50 | 	g->prev_gen = current_gen;
51 | 
52 | 	return unchanged;
53 | }
54 | 
55 | /*
56 |  * gen_init - initialize a shared generation number
57 |  */
58 | static inline void gen_init(struct gen_num *g, uint32_t *gen)
59 | {
60 | 	g->prev_gen = 0;
61 | 	g->gen = gen;
62 | }
63 | 


--------------------------------------------------------------------------------
/inc/base/init.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * init.h - support for initialization
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <base/stddef.h>
 8 | #include <base/thread.h>
 9 | 
10 | extern int base_init(void);
11 | extern int base_init_thread(void);
12 | extern void init_shutdown(int status) __noreturn;
13 | 
14 | extern bool base_init_done;
15 | DECLARE_PERTHREAD(bool, thread_init_done);
16 | 


--------------------------------------------------------------------------------
/inc/base/kref.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * kref.h - generic support for reference counts
 3 |  *
 4 |  * This implementation is inspired by the following paper:
 5 |  * Kroah-Hartman, Greg, kobjects and krefs. Linux Symposium 2004
 6 |  */
 7 | 
 8 | #pragma once
 9 | 
10 | #include <base/stddef.h>
11 | #include <base/atomic.h>
12 | 
13 | struct kref {
14 | 	atomic_t cnt;
15 | };
16 | 
17 | /**
18 |  * kref_init - initializes the reference count to one
19 |  * @ref: the kref
20 |  */
21 | static inline void
22 | kref_init(struct kref *ref)
23 | {
24 | 	atomic_write(&ref->cnt, 1);
25 | }
26 | 
27 | /**
28 |  * kref_initn - initializes the reference count to @n
29 |  * @ref: the kref
30 |  * @n: the initial reference count
31 |  */
32 | static inline void
33 | kref_initn(struct kref *ref, int n)
34 | {
35 | 	atomic_write(&ref->cnt, n);
36 | }
37 | 
38 | /**
39 |  * kref_get - atomically increments the reference count
40 |  * @ref: the kref
41 |  */
42 | static inline void
43 | kref_get(struct kref *ref)
44 | {
45 | 	assert(atomic_read(&ref->cnt) > 0);
46 | 	atomic_inc(&ref->cnt);
47 | }
48 | 
49 | /**
50 |  * kref_put - atomically decrements the reference count, releasing the object
51 |  *	      when it reaches zero
52 |  * @ref: the kref
53 |  * @release: a pointer to the release function
54 |  */
55 | static inline void
56 | kref_put(struct kref *ref, void (*release)(struct kref *ref))
57 | {
58 | 	assert(release);
59 | 	if (atomic_dec_and_test(&ref->cnt))
60 | 		release(ref);
61 | }
62 | 
63 | /**
64 |  * kref_released - has this kref been released?
65 |  * @ref: the kref
66 |  *
67 |  * WARNING: this is unsafe without additional synchronization. For example, use
68 |  * this function while holding a lock that prevents the release() function from
69 |  * removing the object from the data structure you are accessing.
70 |  *
71 |  * Returns true if the reference count has dropped to zero.
72 |  */
73 | static inline bool
74 | kref_released(struct kref *ref)
75 | {
76 | 	return atomic_read(&ref->cnt) == 0;
77 | }
78 | 


--------------------------------------------------------------------------------
/inc/base/limits.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * limits.h - maximum limits for different resources
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #define NCPU		256	/* max number of cpus */
 8 | #define NTHREAD		512	/* max number of threads */
 9 | #define NNUMA		4	/* max number of numa zones */
10 | #define NSTAT		1024	/* max number of stat counters */
11 | 


--------------------------------------------------------------------------------
/inc/base/lock.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * lock.h - locking primitives
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <base/stddef.h>
 8 | #include <asm/ops.h>
 9 | 
10 | #define SPINLOCK_INITIALIZER {.locked = 0}
11 | #define DEFINE_SPINLOCK(name) spinlock_t name = SPINLOCK_INITIALIZER
12 | #define DECLARE_SPINLOCK(name) extern spinlock_t name
13 | 
14 | /**
15 |  * spin_lock_init - prepares a spin lock for use
16 |  * @l: the spin lock
17 |  */
18 | static inline void spin_lock_init(spinlock_t *l)
19 | {
20 | 	l->locked = 0;
21 | }
22 | 
23 | /**
24 |  * spin_lock_held - determines if the lock is held
25 |  * @l: the spin lock
26 |  *
27 |  * Returns true if the lock is held.
28 |  */
29 | static inline bool spin_lock_held(const spinlock_t *l)
30 | {
31 | 	return l->locked != 0;
32 | }
33 | 
34 | /**
35 |  * assert_spin_lock_held - asserts that the lock is currently held
36 |  * @l: the spin lock
37 |  */
38 | static inline void assert_spin_lock_held(spinlock_t *l)
39 | {
40 | 	assert(spin_lock_held(l));
41 | }
42 | 
43 | /**
44 |  * spin_lock - takes a spin lock
45 |  * @l: the spin lock
46 |  */
47 | static inline void spin_lock(spinlock_t *l)
48 | {
49 | 	while (__sync_lock_test_and_set(&l->locked, 1)) {
50 | 		while (l->locked)
51 | 			cpu_relax();
52 | 	}
53 | }
54 | 
55 | /**
56 |  * spin_try_lock- takes a spin lock, but only if it is available
57 |  * @l: the spin lock
58 |  *
59 |  * Returns 1 if successful, otherwise 0
60 |  */
61 | static inline bool spin_try_lock(spinlock_t *l)
62 | {
63 | 	if (!__sync_lock_test_and_set(&l->locked, 1))
64 | 		return true;
65 | 	return false;
66 | }
67 | 
68 | /**
69 |  * spin_unlock - releases a spin lock
70 |  * @l: the spin lock
71 |  */
72 | static inline void spin_unlock(spinlock_t *l)
73 | {
74 | 	assert_spin_lock_held(l);
75 | 	__sync_lock_release(&l->locked);
76 | }
77 | 


--------------------------------------------------------------------------------
/inc/base/mem.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * mem.h - memory management
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <base/types.h>
 8 | 
 9 | enum {
10 | 	PGSHIFT_4KB = 12,
11 | 	PGSHIFT_2MB = 21,
12 | 	PGSHIFT_1GB = 30,
13 | };
14 | 
15 | enum {
16 | 	PGSIZE_4KB = (1 << PGSHIFT_4KB), /* 4096 bytes */
17 | 	PGSIZE_2MB = (1 << PGSHIFT_2MB), /* 2097152 bytes */
18 | 	PGSIZE_1GB = (1 << PGSHIFT_1GB), /* 1073741824 bytes */
19 | };
20 | 
21 | extern bool cfg_transparent_hugepages_enabled;
22 | 
23 | #define PGMASK_4KB	(PGSIZE_4KB - 1)
24 | #define PGMASK_2MB	(PGSIZE_2MB - 1)
25 | #define PGMASK_1GB	(PGSIZE_1GB - 1)
26 | 
27 | /* page numbers */
28 | #define PGN_4KB(la)	(((uintptr_t)(la)) >> PGSHIFT_4KB)
29 | #define PGN_2MB(la)	(((uintptr_t)(la)) >> PGSHIFT_2MB)
30 | #define PGN_1GB(la)	(((uintptr_t)(la)) >> PGSHIFT_1GB)
31 | 
32 | #define PGOFF_4KB(la)	(((uintptr_t)(la)) & PGMASK_4KB)
33 | #define PGOFF_2MB(la)	(((uintptr_t)(la)) & PGMASK_2MB)
34 | #define PGOFF_1GB(la)	(((uintptr_t)(la)) & PGMASK_1GB)
35 | 
36 | #define PGADDR_4KB(la)	(((uintptr_t)(la)) & ~((uintptr_t)PGMASK_4KB))
37 | #define PGADDR_2MB(la)	(((uintptr_t)(la)) & ~((uintptr_t)PGMASK_2MB))
38 | #define PGADDR_1GB(la)	(((uintptr_t)(la)) & ~((uintptr_t)PGMASK_1GB))
39 | 
40 | typedef unsigned long physaddr_t; /* physical addresses */
41 | typedef unsigned long virtaddr_t; /* virtual addresses */
42 | 
43 | #ifndef MAP_FAILED
44 | #define MAP_FAILED	((void *)-1)
45 | #endif
46 | 
47 | typedef unsigned int mem_key_t;
48 | 
49 | extern void *mem_map_anom(void *base, size_t len, size_t pgsize, int node);
50 | extern void *mem_map_file(void *base, size_t len, int fd, off_t offset);
51 | extern void *mem_map_shm(mem_key_t key, void *base, size_t len,
52 | 			 size_t pgsize, bool exclusive);
53 | extern void *mem_map_shm_rdonly(mem_key_t key, void *base, size_t len,
54 | 			 size_t pgsize);
55 | extern int mem_unmap_shm(void *base);
56 | extern int mem_lookup_page_phys_addrs(void *addr, size_t len, size_t pgsize,
57 | 				      physaddr_t *maddrs);
58 | extern void touch_mapping(void *base, size_t len, size_t pgsize);
59 | 
60 | static inline int
61 | mem_lookup_page_phys_addr(void *addr, size_t pgsize, physaddr_t *paddr)
62 | {
63 | 	return mem_lookup_page_phys_addrs(addr, pgsize, pgsize, paddr);
64 | }
65 | 


--------------------------------------------------------------------------------
/inc/base/mempool.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * mempool.h - a simple, preallocated pool of memory
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <base/stddef.h>
 8 | #include <base/tcache.h>
 9 | 
10 | struct mempool {
11 | 	void			**free_items;
12 | 	size_t			allocated;
13 | 	size_t			capacity;
14 | 	void			*buf;
15 | 	size_t			len;
16 | 	size_t			pgsize;
17 | 	size_t			item_len;
18 | };
19 | 
20 | #ifdef DEBUG
21 | extern void __mempool_alloc_debug_check(struct mempool *m, void *item);
22 | extern void __mempool_free_debug_check(struct mempool *m, void *item);
23 | #else /* DEBUG */
24 | static inline void __mempool_alloc_debug_check(struct mempool *m, void *item) {}
25 | static inline void __mempool_free_debug_check(struct mempool *m, void *item) {}
26 | #endif /* DEBUG */
27 | 
28 | static inline bool mempool_member(struct mempool *m, void *addr)
29 | {
30 | 	return addr >= m->buf && addr < m->buf + m->len;
31 | }
32 | 
33 | /**
34 |  * mempool_alloc - allocates an item from the pool
35 |  * @m: the memory pool to allocate from
36 |  *
37 |  * Returns an item, or NULL if the pool is empty.
38 |  */
39 | static inline void *mempool_alloc(struct mempool *m)
40 | {
41 | 	void *item;
42 | 	if (unlikely(m->allocated >= m->capacity))
43 | 		return NULL;
44 | 	item = m->free_items[m->allocated++];
45 | 	__mempool_alloc_debug_check(m, item);
46 | 	return item;
47 | }
48 | 
49 | /**
50 |  * mempool_free - returns an item to the pool
51 |  * @m: the memory pool the item was allocated from
52 |  * @item: the item to return
53 |  */
54 | static inline void mempool_free(struct mempool *m, void *item)
55 | {
56 | 	__mempool_free_debug_check(m, item);
57 | 	m->free_items[--m->allocated] = item;
58 | 	assert(m->allocated <= m->capacity); /* could have overflowed */
59 | }
60 | 
61 | extern int mempool_create(struct mempool *m, void *buf, size_t len,
62 | 			  size_t pgsize, size_t item_len);
63 | extern void mempool_destroy(struct mempool *m);
64 | 
65 | extern struct tcache *mempool_create_tcache(struct mempool *m, const char *name,
66 | 					    unsigned int mag_size);
67 | 


--------------------------------------------------------------------------------
/inc/base/pci.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * pci.h - PCI bus support
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <base/types.h>
 8 | #include <base/kref.h>
 9 | 
10 | struct pci_bar {
11 | 	uint64_t start;	/* the start address, or zero if no resource */
12 | 	uint64_t len;	/* the length of the resource */
13 | 	uint64_t flags; /* Linux resource flags */
14 | };
15 | 
16 | /* NOTE: these are the same as the Linux PCI sysfs resource flags */
17 | #define PCI_BAR_IO		0x00000100
18 | #define PCI_BAR_MEM		0x00000200
19 | #define PCI_BAR_PREFETCH	0x00002000 /* typically WC memory */
20 | #define PCI_BAR_READONLY	0x00004000 /* typically option ROMs */
21 | #define PCI_MAX_BARS		7
22 | 
23 | struct pci_addr {
24 | 	uint16_t domain;
25 | 	uint8_t bus;
26 | 	uint8_t slot;
27 | 	uint8_t func;
28 | } __packed;
29 | 
30 | extern int pci_str_to_addr(const char *str, struct pci_addr *addr);
31 | 
32 | struct pci_dev {
33 | 	struct pci_addr addr;
34 | 	struct kref ref;
35 | 
36 | 	uint16_t vendor_id;
37 | 	uint16_t device_id;
38 | 	uint16_t subsystem_vendor_id;
39 | 	uint16_t subsystem_device_id;
40 | 
41 | 	struct pci_bar bars[PCI_MAX_BARS];
42 | 	int numa_node;
43 | 	int max_vfs;
44 | };
45 | 
46 | extern struct pci_dev *pci_alloc_dev(const struct pci_addr *addr);
47 | extern void pci_release_dev(struct kref *ref);
48 | extern struct pci_bar *pci_find_mem_bar(struct pci_dev *dev, int count);
49 | extern void *pci_map_mem_bar(struct pci_dev *dev, struct pci_bar *bar, bool wc);
50 | extern void pci_unmap_mem_bar(struct pci_bar *bar, void *vaddr);
51 | 
52 | /**
53 |  * pci_dev_get - increments the PCI device refcount
54 |  * @dev: the PCI device
55 |  *
56 |  * Returns the device.
57 |  */
58 | static inline struct pci_dev *pci_dev_get(struct pci_dev *dev)
59 | {
60 | 	kref_get(&dev->ref);
61 | 	return dev;
62 | }
63 | 
64 | /**
65 |  * pci_dev_put - decrements the PCI device refcount, freeing at zero
66 |  * @dev: the PCI device
67 |  */
68 | static inline void pci_dev_put(struct pci_dev *dev)
69 | {
70 | 	kref_put(&dev->ref, pci_release_dev);
71 | }
72 | 


--------------------------------------------------------------------------------
/inc/base/signal.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * signal.h - support for setting up signal handlers without using glibc
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <signal.h>
 8 | 
 9 | extern int base_sigaction(int sig, const struct sigaction *act,
10 |                           struct sigaction *oact);
11 | extern int base_sigaction_full(int sig, const struct sigaction *act,
12 |                                struct sigaction *oact);


--------------------------------------------------------------------------------
/inc/base/slab.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * slab.h - a SLAB allocator
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <base/stddef.h>
 8 | #include <base/list.h>
 9 | #include <base/thread.h>
10 | #include <base/limits.h>
11 | 
12 | /* forward declarations */
13 | struct slab_hdr;
14 | struct slab_node;
15 | struct tcache;
16 | 
17 | 
18 | /*
19 |  * slab support
20 |  */
21 | 
22 | #define SLAB_CHUNK_SIZE 	8
23 | #define SLAB_MIN_SIZE		16
24 | 
25 | /* function attributes for methods that allocate slab items */
26 | #define __slab_malloc		__malloc __assume_aligned(SLAB_MIN_SIZE)
27 | 
28 | /* Slab nodes are per-numa node slab internal state. */
29 | struct slab_node {
30 | 	size_t			size;
31 | 	int			numa_node;
32 | 	int			offset;
33 | 	int			flags;
34 | 	int			nr_elems;
35 | 	spinlock_t		page_lock;
36 | 
37 | 	/* slab pages */
38 | 	off_t			pg_off;
39 | 	struct page		*cur_pg;
40 | 	struct list_head	full_list;
41 | 	struct list_head	partial_list;
42 | 	int			nr_pages;
43 | };
44 | 
45 | struct slab {
46 | 	const char		*name;
47 | 	size_t			size;
48 | 	struct list_node	link;
49 | 	struct slab_node	*nodes[NNUMA];
50 | } __aligned(CACHE_LINE_SIZE);
51 | 
52 | /* force the slab to be backed with large pages */
53 | #define SLAB_FLAG_LGPAGE	BIT(0)
54 | /* false sharing is okay (less internal fragmentation) */
55 | #define SLAB_FLAG_FALSE_OKAY	BIT(1)
56 | /* managing 4kb pages (internal use only) */
57 | #define SLAB_FLAG_PAGES		BIT(2)
58 | 
59 | extern int slab_create(struct slab *s, const char *name, size_t size, int flags);
60 | extern void slab_destroy(struct slab *s);
61 | extern int slab_reclaim(struct slab *s);
62 | extern void *slab_alloc_on_node(struct slab *s, int numa_node) __slab_malloc;
63 | extern void slab_free(struct slab *s, void *item);
64 | extern void slab_print_usage(void);
65 | 
66 | /**
67 |  * slab_alloc - allocates an item on the local NUMA node
68 |  * @s: the slab to allocate from
69 |  *
70 |  * Returns an item or NULL if out of memory.
71 |  */
72 | static __always_inline void *slab_alloc(struct slab *s)
73 | {
74 | 	return slab_alloc_on_node(s, this_numa_node());
75 | }
76 | 
77 | struct tcache *slab_create_tcache(struct slab *s, unsigned int mag_size);
78 | 


--------------------------------------------------------------------------------
/inc/base/stat.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * stat.h - statistics counter support
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <base/stddef.h>
 8 | #include <base/thread.h>
 9 | #include <base/limits.h>
10 | #include <base/list.h>
11 | 
12 | 
13 | /*
14 |  * Generic stat counter API
15 |  */
16 | 
17 | struct stat_entry;
18 | typedef uint64_t (*stat_collect_fn_t)(struct stat_entry *e, unsigned long data);
19 | 
20 | struct stat_entry {
21 | 	const char		*name;
22 | 	stat_collect_fn_t	handler;
23 | 	unsigned long		data;
24 | 	struct list_node	link;
25 | };
26 | 
27 | extern int stat_register(struct stat_entry *entry);
28 | extern void stat_unregister(struct stat_entry *entry);
29 | extern uint64_t stat_collect(struct stat_entry *entry);
30 | 
31 | struct stat_result {
32 | 	const char		*name;
33 | 	uint64_t		val;
34 | };
35 | 
36 | extern int stat_collect_all(struct stat_result *results_out, int capacity);
37 | extern void stat_print_all(void);
38 | 
39 | 
40 | /*
41 |  * Some common stat collectors
42 |  */
43 | 
44 | extern uint64_t __stat_var_collect(struct stat_entry *e, unsigned long data);
45 | extern uint64_t __stat_perthread_var_collect(struct stat_entry *e,
46 | 					     unsigned long data);
47 | 
48 | /**
49 |  * stat_register_var - registers a stat backed by a uint64_t
50 |  * @entry: the stat entry struct to register
51 |  * @name: a human-readable name for the stat
52 |  * @val: the uint64_t value that stores the count
53 |  *
54 |  * Returns 0 if successful, otherwise fail.
55 |  */
56 | static inline int
57 | stat_register_var(struct stat_entry *entry, const char *name, uint64_t *val)
58 | {
59 | 	entry->name = name;
60 | 	entry->handler = __stat_var_collect;
61 | 	entry->data = (unsigned long)val;
62 | 	return stat_register(entry);
63 | }
64 | 
65 | /**
66 |  * stat_register_perthread_var - registers a stat backed by a perthread uint64_t
67 |  * @entry: the stat entry struct to register
68 |  * @name: a human-readable name for the stat
69 |  * @val: the perthread uint64_t value that stores the count
70 |  *
71 |  * Returns 0 if successful, otherwise fail.
72 |  */
73 | static inline int
74 | stat_register_perthread_var(struct stat_entry *entry, const char *name,
75 | 			    uint64_t __perthread *val)
76 | {
77 | 	entry->name = name;
78 | 	entry->handler = __stat_perthread_var_collect;
79 | 	entry->data = (__force unsigned long)val;
80 | 	return stat_register(entry);
81 | }
82 | 


--------------------------------------------------------------------------------
/inc/base/syscall.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * syscall.h - support for common syscalls in the base library
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <stddef.h>
 8 | #include <sys/types.h>
 9 | 
10 | extern const char base_syscall_start[];
11 | extern const char base_syscall_end[];
12 | 
13 | extern void *syscall_mmap(void *addr, size_t length, int prot, int flags,
14 | 	                      int fd, off_t offset);
15 | extern long syscall_mbind(void *start, size_t len, int mode,
16 | 	                      const unsigned long *nmask, unsigned long maxnode,
17 | 	                      unsigned flags);
18 | extern void syscall_rt_sigreturn(void);
19 | extern int syscall_ioctl(int fd, unsigned long int request, void *arg);
20 | extern int syscall_madvise(void *addr, size_t length, int advice);
21 | extern int syscall_mprotect(void *addr, size_t len, int prot);


--------------------------------------------------------------------------------
/inc/base/sysfs.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * sysfs.h - utilities for accessing sysfs
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <base/types.h>
 8 | 
 9 | #define SYSFS_PCI_PATH		"/sys/bus/pci/devices"
10 | #define SYSFS_CPU_TOPOLOGY_PATH	"/sys/devices/system/cpu/cpu%d/topology"
11 | #define SYSFS_NODE_PATH		"/sys/devices/system/node/node%d"
12 | 
13 | extern int sysfs_parse_val(const char *path, uint64_t *val_out);
14 | extern int sysfs_parse_bitlist(const char *path, unsigned long *bits,
15 | 			       int nbits);
16 | 


--------------------------------------------------------------------------------
/inc/base/time.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * time.h - timekeeping utilities
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <base/types.h>
 8 | #include <asm/ops.h>
 9 | 
10 | #define ONE_SECOND	1000000
11 | #define ONE_MS		1000
12 | #define ONE_US		1
13 | 
14 | extern int cycles_per_us;
15 | extern uint64_t start_tsc; 
16 | 
17 | /**
18 |  * microtime - gets the number of microseconds since the process started
19 |  * This routine is very inexpensive, even compared to clock_gettime().
20 |  */
21 | static inline uint64_t microtime(void)
22 | {
23 | 	return (rdtsc() - start_tsc) / cycles_per_us;
24 | }
25 | 
26 | extern void __time_delay_us(uint64_t us);
27 | 
28 | /**
29 |  * delay_us - pauses the CPU for microseconds
30 |  * @us: the number of microseconds
31 |  */
32 | static inline void delay_us(uint64_t us)
33 | {
34 | 	__time_delay_us(us);
35 | }
36 | 
37 | /**
38 |  * delay_ms - pauses the CPU for milliseconds
39 |  * @ms: the number of milliseconds
40 |  */
41 | static inline void delay_ms(uint64_t ms)
42 | {
43 | 	/* TODO: yield instead of spin */
44 | 	__time_delay_us(ms * ONE_MS);
45 | }
46 | 


--------------------------------------------------------------------------------
/inc/base/types.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * types.h - primitive type definitions
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <stdbool.h>
 8 | #include <asm/cpu.h>
 9 | 
10 | typedef unsigned char uint8_t;
11 | typedef unsigned short uint16_t;
12 | typedef unsigned int uint32_t;
13 | 
14 | typedef signed char int8_t;
15 | typedef signed short int16_t;
16 | typedef signed int int32_t;
17 | 
18 | #ifndef __WORD_SIZE
19 | #error __WORD_SIZE is undefined
20 | #endif
21 | 
22 | #if __WORD_SIZE == __64BIT_WORDS
23 | 
24 | typedef unsigned long uint64_t;
25 | typedef signed long int64_t;
26 | 
27 | #else /* __WORDSIZE == __64BIT_WORDS */
28 | 
29 | typedef unsigned long long uint64_t;
30 | typedef signed long long int64_t;
31 | 
32 | #endif /* __WORDSIZE == __64BIT_WORDS */
33 | 
34 | typedef unsigned long	uintptr_t;
35 | typedef long		intptr_t;
36 | typedef long		off_t;
37 | typedef unsigned long	size_t;
38 | typedef long		ssize_t;
39 | 
40 | typedef struct {
41 | 	volatile int locked;
42 | } spinlock_t;
43 | 
44 | typedef struct {
45 | 	volatile int cnt;
46 | } atomic_t;
47 | 
48 | typedef struct {
49 | 	volatile long cnt;
50 | } atomic64_t;
51 | 


--------------------------------------------------------------------------------
/inc/iokernel/directpath.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * directpath.h - definitions for directpath structures
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <sys/types.h>
 8 | 
 9 | #include <iokernel/shm.h>
10 | 
11 | #define DIRECTPATH_STRIDE_RQ_NUM_DESC   128
12 | #define DIRECTPATH_STRIDE_MODE_BUF_SZ   16384
13 | #define DIRECTPATH_STRIDE_SIZE          256
14 | 
15 | #define DIRECTPATH_NUM_STRIDES \
16 |  (DIRECTPATH_STRIDE_MODE_BUF_SZ / DIRECTPATH_STRIDE_SIZE)
17 | 
18 | #define DIRECTPATH_STRIDE_SHIFT (__builtin_ctz(DIRECTPATH_NUM_STRIDES))
19 | 
20 | #define DIRECTPATH_TOTAL_RX_EL \
21 |  (DIRECTPATH_NUM_STRIDES * DIRECTPATH_STRIDE_RQ_NUM_DESC)
22 | #define DIRECTPATH_STRIDE_REFILL_THRESH_HI \
23 |  (DIRECTPATH_TOTAL_RX_EL * 1 / 4)
24 | 
25 | #define DIRECTPATH_STRIDE_RX_BUF_POOL_SZ \
26 |     (2 * DIRECTPATH_STRIDE_RQ_NUM_DESC * DIRECTPATH_STRIDE_MODE_BUF_SZ)
27 | 
28 | BUILD_ASSERT(DIRECTPATH_STRIDE_MODE_BUF_SZ % DIRECTPATH_STRIDE_SIZE == 0);
29 | BUILD_ASSERT(PGSIZE_2MB % DIRECTPATH_STRIDE_MODE_BUF_SZ == 0);
30 | BUILD_ASSERT(DIRECTPATH_STRIDE_SIZE >= 64);
31 | 
32 | struct directpath_ring_q_spec {
33 |     shmptr_t buf;
34 |     shmptr_t dbrec;
35 |     uint64_t nr_entries;
36 |     uint32_t stride;
37 | };
38 | 
39 | struct directpath_queue_spec {
40 |     uint32_t sqn;
41 |     uint32_t uarn;
42 |     uint32_t uar_offset;
43 |     struct directpath_ring_q_spec rx_wq;
44 |     struct directpath_ring_q_spec rx_cq;
45 |     struct directpath_ring_q_spec tx_wq;
46 |     struct directpath_ring_q_spec tx_cq;
47 | };
48 | 
49 | struct directpath_spec {
50 |     uint32_t mr;
51 |     size_t va_base;
52 |     size_t memfd_region_size;
53 | 
54 |     /* bar map */
55 |     off_t offs;
56 |     size_t bar_map_size;
57 | 
58 |     struct directpath_ring_q_spec rmp;
59 | 
60 |     shmptr_t buf_region;
61 |     size_t rx_buf_region_size;
62 |     size_t tx_buf_region_size;
63 | 
64 |     struct directpath_queue_spec qs[];
65 | };
66 | 


--------------------------------------------------------------------------------
/inc/net/arp.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * arp.h - Address Resolution Protocol (RFC 826, RFC 903)
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <base/stddef.h>
 8 | #include <net/ethernet.h>
 9 | #include <net/ip.h>
10 | 
11 | struct arp_hdr {
12 | 	uint16_t htype;
13 | 	uint16_t ptype;		/* the ETHERTYPE */
14 | 	uint8_t  hlen;
15 | 	uint8_t  plen;
16 | 	uint16_t op;
17 | 
18 | 	/*
19 | 	 * Variable length fields continue as follows:
20 | 	 *    sender hw addr: hlen bytes
21 | 	 *    sender protocol addr: plen bytes
22 | 	 *    target hw addr: hlen bytes
23 | 	 *    target protocol addr: plen bytes
24 | 	 */
25 | } __packed;
26 | 
27 | struct arp_hdr_ethip {
28 | 	struct eth_addr	sender_mac;
29 | 	uint32_t	sender_ip;
30 | 	struct eth_addr	target_mac;
31 | 	uint32_t	target_ip;
32 | } __packed;
33 | 
34 | #define ARP_HTYPE_ETHER		1	/* ethernet */
35 | #define ARP_HTYPE_IEEE802	6	/* token-ring */
36 | #define ARP_HTYPE_ARCNET	7	/* arcnet */
37 | #define ARP_HTYPE_FRELAY	16	/* frame relay */
38 | #define ARP_HTYPE_IEEE1394	24	/* firewire */
39 | #define ARP_HTYPE_INFINIBAND	32	/* infiniband */
40 | 
41 | enum {
42 | 	ARP_OP_REQUEST = 1,	/* request hw addr given protocol addr */
43 | 	ARP_OP_REPLY = 2,	/* response hw addr given protocol addr  */
44 | 	ARP_OP_REVREQUEST = 3,	/* request protocol addr given hw addr */
45 | 	ARP_OP_REVREPLY = 4,	/* response protocol addr given hw addr */
46 | };
47 | 


--------------------------------------------------------------------------------
/inc/net/mbufq.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * mbufq.h - singly-linked queue of MBUFs
  3 |  */
  4 | 
  5 | #pragma once
  6 | 
  7 | #include <net/mbuf.h>
  8 | 
  9 | struct mbuf;
 10 | 
 11 | struct mbufq {
 12 | 	struct mbuf *head, *tail;
 13 | };
 14 | 
 15 | /**
 16 |  * mbufq_push_tail - push an mbuf to the tail of the queue
 17 |  * @q: the mbuf queue
 18 |  * @m: the mbuf to push
 19 |  */
 20 | static inline void mbufq_push_tail(struct mbufq *q, struct mbuf *m)
 21 | {
 22 | 	m->next = NULL;
 23 | 	if (!q->head) {
 24 | 		q->head = q->tail = m;
 25 | 		return;
 26 | 	}
 27 | 	q->tail->next = m;
 28 | 	q->tail = m;
 29 | }
 30 | 
 31 | /**
 32 |  * mbufq_pop_head - pop an mbuf from the head of the queue
 33 |  * @q: the mbuf queue
 34 |  *
 35 |  * Returns an mbuf or NULL if the queue is empty.
 36 |  */
 37 | static inline struct mbuf *mbufq_pop_head(struct mbufq *q)
 38 | {
 39 | 	struct mbuf *head = q->head;
 40 | 	if (!head)
 41 | 		return NULL;
 42 | 	q->head = head->next;
 43 | 	return head;
 44 | }
 45 | 
 46 | /**
 47 |  * mbufq_peak_head - reads the head of the queue without popping
 48 |  * @q: the mbuf queue
 49 |  *
 50 |  * Returns an mbuf or NULL if the queue is empty.
 51 |  */
 52 | static inline struct mbuf *mbufq_peak_head(struct mbufq *q)
 53 | {
 54 | 	return q->head;
 55 | }
 56 | 
 57 | /**
 58 |  * mbufq_merge_to_tail - merges a queue to the end of another queue
 59 |  * @dst: the destination queue (will contain all the mbufs)
 60 |  * @src: the source queue (will become empty)
 61 |  */
 62 | static inline void mbufq_merge_to_tail(struct mbufq *dst, struct mbufq *src)
 63 | {
 64 | 	if (!src->head)
 65 | 		return;
 66 | 	if (!dst->head)
 67 | 		dst->head = src->head;
 68 | 	else 
 69 | 		dst->tail->next = src->head;
 70 | 	dst->tail = src->tail;
 71 | 	src->head = NULL;
 72 | }
 73 | 
 74 | /**
 75 |  * mbufq_empty - returns true if the queue is empty
 76 |  */
 77 | static inline bool mbufq_empty(struct mbufq *q)
 78 | {
 79 | 	return q->head == NULL;
 80 | }
 81 | 
 82 | /**
 83 |  * mbufq_release - frees all the mbufs in the queue
 84 |  * @q: the queue to release
 85 |  */
 86 | static inline void mbufq_release(struct mbufq *q)
 87 | {
 88 | 	struct mbuf *m;
 89 | 	while (true) {
 90 | 		m = mbufq_pop_head(q);
 91 | 		if (!m)
 92 | 			break;
 93 | 		mbuf_free(m);
 94 | 	}
 95 | }
 96 | 
 97 | /**
 98 |  * mbufq_init - initializes a queue
 99 |  * @q: the mbuf queue to initialize
100 |  */
101 | static inline void mbufq_init(struct mbufq *q)
102 | {
103 | 	q->head = NULL;
104 | }
105 | 


--------------------------------------------------------------------------------
/inc/net/ping.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <sys/time.h>
 4 | 
 5 | #include <net/icmp.h>
 6 | 
 7 | struct ping_payload {
 8 | 	struct timeval tx_time;
 9 | };
10 | 
11 | int net_ping_init();
12 | void net_send_ping(uint16_t seq_num, uint32_t daddr);
13 | void net_recv_ping(const struct ping_payload *payload,
14 | 		const struct icmp_pkt *icmp_pkt);
15 | 


--------------------------------------------------------------------------------
/inc/net/tcp.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * tcp.h - Transmission Control Protocol (TCP) definitions
 3 |  *
 4 |  * Based on Freebsd, BSD licensed.
 5 |  */
 6 | 
 7 | #pragma once
 8 | 
 9 | #include <base/stddef.h>
10 | 
11 | typedef	uint32_t tcp_seq;
12 | 
13 | /*
14 |  * TCP header.
15 |  * Per RFC 793, September, 1981.
16 |  */
17 | struct tcp_hdr {
18 | 	uint16_t	sport;		/* source port */
19 | 	uint16_t	dport;		/* destination port */
20 | 	tcp_seq		seq;		/* sequence number */
21 | 	tcp_seq		ack;		/* acknowledgement number */
22 | #if __BYTE_ORDER == __LITTLE_ENDIAN
23 | 	uint8_t		x2:4,		/* (unused) */
24 | 			off:4;		/* data offset */
25 | #endif
26 | #if __BYTE_ORDER == __BIG_ENDIAN
27 | 	uint8_t		off:4,		/* data offset */
28 | 			x2:4;		/* (unused) */
29 | #endif
30 | 	uint8_t		flags;
31 | #define	TCP_FIN		0x01
32 | #define	TCP_SYN		0x02
33 | #define	TCP_RST		0x04
34 | #define	TCP_PUSH	0x08
35 | #define	TCP_ACK		0x10
36 | #define	TCP_URG		0x20
37 | #define	TCP_ECE		0x40
38 | #define	TCP_CWR		0x80
39 | #define	TCP_FLAGS \
40 | 	(TCP_FIN|TCP_SYN|TCP_RST|TCP_PUSH|TCP_ACK|TCP_URG|TCP_ECE|TCP_CWR)
41 | #define	PRINT_TCP_FLAGS	"\20\1FIN\2SYN\3RST\4PUSH\5ACK\6URG\7ECE\10CWR"
42 | 
43 | 	uint16_t	win;		/* window */
44 | 	uint16_t	sum;		/* checksum */
45 | 	uint16_t	urp;		/* urgent pointer */
46 | };
47 | 
48 | /*
49 |  * TCP options.
50 |  */
51 | #define TCP_OPT_EOL	0 /* end of options */
52 | #define TCP_OPT_NOP	1 /* used for padding */
53 | #define TCP_OPT_MSS	2 /* maximum segment size negotiation */
54 | #define TCP_OPT_WSCALE	3 /* window scaling factor */
55 | 
56 | #define TCP_OLEN_MSS	4
57 | #define TCP_OLEN_WSCALE	3
58 | 


--------------------------------------------------------------------------------
/inc/net/udp.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * udp.h - User Datagram Protocol
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <base/types.h>
 8 | 
 9 | struct udp_hdr {
10 | 	uint16_t src_port;
11 | 	uint16_t dst_port;
12 | 	uint16_t len;
13 | 	uint16_t chksum;
14 | };
15 | 


--------------------------------------------------------------------------------
/inc/runtime/gc.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <runtime/thread.h>
 4 | 
 5 | struct kthread;
 6 | 
 7 | // #define GC 1
 8 | 
 9 | #ifdef GC
10 | 
11 | /* External API */
12 | typedef void (*stack_bounds_cb)(uint64_t bottom, uint64_t top);
13 | extern void gc_stop_world(void);
14 | extern void gc_start_world(void);
15 | 
16 | /* reports each active stack to discover_cb */
17 | extern void gc_discover_all_stacks(stack_bounds_cb discover_cb);
18 | 
19 | /* Internal API */
20 | extern volatile bool world_stopped;
21 | extern volatile uint64_t gc_gen;
22 | 
23 | static inline bool is_world_stopped(void)
24 | {
25 | 	return ACCESS_ONCE(world_stopped);
26 | }
27 | 
28 | static inline uint64_t get_gc_gen(void)
29 | {
30 | 	return ACCESS_ONCE(gc_gen);
31 | }
32 | 
33 | extern int gc_register_thread(thread_t *th);
34 | extern int gc_remove_thread(thread_t *th);
35 | extern void gc_kthread_report(struct kthread *k);
36 | 
37 | #else
38 | static inline int gc_register_thread(thread_t *th)
39 | {
40 | 	return 0;
41 | }
42 | static inline int gc_remove_thread(thread_t *th)
43 | {
44 | 	return 0;
45 | }
46 | static inline void gc_kthread_report(struct kthread *k) {}
47 | static inline bool is_world_stopped(void)
48 | {
49 | 	return false;
50 | }
51 | 
52 | #endif
53 | 


--------------------------------------------------------------------------------
/inc/runtime/net.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * net.h - shared network definitions
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <base/types.h>
 8 | 
 9 | struct netaddr {
10 | 	uint32_t ip;
11 | 	uint16_t port;
12 | };
13 | 
14 | extern int str_to_netaddr(const char *str, struct netaddr *addr);
15 | 


--------------------------------------------------------------------------------
/inc/runtime/poll.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * poll.h - support for event polling (similar to select/epoll/poll, etc.)
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <base/stddef.h>
 8 | #include <base/list.h>
 9 | #include <runtime/thread.h>
10 | #include <runtime/sync.h>
11 | 
12 | typedef struct poll_waiter {
13 | 	spinlock_t		lock;
14 | 	struct list_head	triggered;
15 | 	thread_t		*waiting_th;
16 | } poll_waiter_t;
17 | 
18 | typedef struct poll_trigger {
19 | 	struct list_node	link;
20 | 	struct poll_waiter	*waiter;
21 | 	bool			triggered;
22 | 	unsigned long		data;
23 | } poll_trigger_t;
24 | 
25 | 
26 | /*
27 |  * Waiter API
28 |  */
29 | 
30 | extern void poll_init(poll_waiter_t *w);
31 | extern void poll_arm(poll_waiter_t *w, poll_trigger_t *t, unsigned long data);
32 | extern void poll_disarm(poll_trigger_t *t);
33 | extern unsigned long poll_wait(poll_waiter_t *w);
34 | 
35 | 
36 | /*
37 |  * Trigger API
38 |  */
39 | 
40 | /**
41 |  * poll_trigger_init - initializes a trigger
42 |  * @t: the trigger to initialize
43 |  */
44 | static inline void poll_trigger_init(poll_trigger_t *t)
45 | {
46 | 	t->waiter = NULL;
47 | 	t->triggered = false;
48 | }
49 | 
50 | extern void poll_trigger(poll_waiter_t *w, poll_trigger_t *t);
51 | 


--------------------------------------------------------------------------------
/inc/runtime/preempt.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * preempt.h - support for kthread preemption
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <base/stddef.h>
 8 | #include <base/thread.h>
 9 | 
10 | DECLARE_PERTHREAD(unsigned int, preempt_cnt);
11 | DECLARE_PERTHREAD(void *, uintr_stack);
12 | 
13 | extern void preempt(void);
14 | extern void uintr_asm_return(void);
15 | 
16 | extern size_t xsave_max_size;
17 | extern size_t xsave_features;
18 | 
19 | /* this flag is set whenever there is _not_ a pending preemption */
20 | #define PREEMPT_NOT_PENDING	(1 << 31)
21 | 
22 | /**
23 |  * preempt_disable - disables preemption
24 |  *
25 |  * Can be nested.
26 |  */
27 | static __always_inline __nofp void preempt_disable(void)
28 | {
29 | 	asm volatile("addl $1, %%gs:__perthread_preempt_cnt(%%rip)" ::: "memory", "cc");
30 | 	barrier();
31 | }
32 | 
33 | /**
34 |  * preempt_enable_nocheck - reenables preemption without checking for conditions
35 |  *
36 |  * Can be nested.
37 |  */
38 | static inline void preempt_enable_nocheck(void)
39 | {
40 | 	barrier();
41 | 	perthread_decr(preempt_cnt);
42 | }
43 | 
44 | /**
45 |  * preempt_enable - reenables preemption
46 |  *
47 |  * Can be nested.
48 |  */
49 | static __always_inline __nofp void preempt_enable(void)
50 | {
51 | #ifndef __GCC_ASM_FLAG_OUTPUTS__
52 | 	preempt_enable_nocheck();
53 | 	if (unlikely(perthread_read(preempt_cnt) == 0))
54 | 		preempt();
55 | #else
56 | 	int zero;
57 | 	barrier();
58 | 	asm volatile("subl $1, %%gs:__perthread_preempt_cnt(%%rip)"
59 | 		     : "=@ccz" (zero) :: "memory", "cc");
60 | 	if (unlikely(zero))
61 | 		preempt();
62 | #endif
63 | }
64 | 
65 | /**
66 |  * preempt_needed - returns true if a preemption event is stuck waiting
67 |  */
68 | static inline bool preempt_needed(void)
69 | {
70 | 	return (perthread_read(preempt_cnt) & PREEMPT_NOT_PENDING) == 0;
71 | }
72 | 
73 | /**
74 |  * preempt_enabled - returns true if preemption is enabled
75 |  */
76 | static __always_inline __nofp bool preempt_enabled(void)
77 | {
78 | 	return (perthread_read(preempt_cnt) & ~PREEMPT_NOT_PENDING) == 0;
79 | }
80 | 
81 | /**
82 |  * assert_preempt_disabled - asserts that preemption is disabled
83 |  */
84 | static inline void assert_preempt_disabled(void)
85 | {
86 | 	assert(!preempt_enabled());
87 | }
88 | 
89 | /**
90 |  * clear_preempt_needed - clear the flag that indicates a preemption request is
91 |  * pending
92 |  */
93 | static inline void clear_preempt_needed(void)
94 | {
95 | 	BUILD_ASSERT(PREEMPT_NOT_PENDING == 0x80000000);
96 | 	perthread_ori(preempt_cnt, 0x80000000);
97 | }
98 | 


--------------------------------------------------------------------------------
/inc/runtime/rculist.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * rculist.h - support for RCU list data structures
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <runtime/rcu.h>
 8 | 
 9 | struct rcu_hlist_node {
10 | 	struct rcu_hlist_node __rcu *next;
11 | 	struct rcu_hlist_node * __rcu *pprev;
12 | };
13 | 
14 | struct rcu_hlist_head {
15 | 	struct rcu_hlist_node __rcu *head;
16 | };
17 | 
18 | /**
19 |  * rcu_hlist_init_head - initializes an RCU hlist
20 |  * @h: the list head
21 |  */
22 | static inline void rcu_hlist_init_head(struct rcu_hlist_head *h)
23 | {
24 | 	RCU_INIT_POINTER(h->head, NULL);
25 | }
26 | 
27 | /**
28 |  * rcu_hlist_add_head - adds a node to the head of an RCU hlist
29 |  * @h: the list head
30 |  * @n: the node to add
31 |  */
32 | static inline void rcu_hlist_add_head(struct rcu_hlist_head *h,
33 | 				      struct rcu_hlist_node *n)
34 | {
35 | 	struct rcu_hlist_node *head = h->head;
36 | 	RCU_INIT_POINTER(n->next, head);
37 | 	n->pprev = &h->head;
38 | 	rcu_assign_pointer(h->head, n);
39 | 	if (head)
40 | 		head->pprev = &n->next;
41 | }
42 | 
43 | /**
44 |  * rcu_hlist_del - removes a node from an RCU hlist
45 |  * @n: the node to remove
46 |  */
47 | static inline void rcu_hlist_del(struct rcu_hlist_node *n)
48 | {
49 | 	rcu_assign_pointer(*n->pprev, n->next);
50 | 	if (n->next)
51 | 		n->next->pprev = n->pprev;
52 | }
53 | 
54 | /**
55 |  * rcu_hlist_empty - returns true if the RCU hlist is empty
56 |  * @h: the list head
57 |  * @check: proof that a lock is held
58 |  *
59 |  * If @check is false, must be in an RCU critical section.
60 |  */
61 | static inline bool rcu_hlist_empty(struct rcu_hlist_head *h, bool check)
62 | {
63 | 	return rcu_dereference_protected(h->head, check) == NULL;
64 | }
65 | 
66 | #define rcu_hlist_entry(n, type, member) container_of(n, type, member)
67 | 
68 | #define rcu_hlist_for_each(h, pos, check)				\
69 | 	for ((pos) = rcu_dereference_protected((h)->head, check); (pos);\
70 | 	     (pos) = rcu_dereference_protected((pos)->next, check))
71 | 
72 | #define rcu_hlist_for_each_safe(h, pos, tmp, check)			\
73 | 	for ((pos) = rcu_dereference_protected((h)->head, check); (pos)	\
74 | 	     && ((tmp) = rcu_dereference_protected((pos)->next, check), 1);\
75 | 	     (pos) = (tmp))
76 | 


--------------------------------------------------------------------------------
/inc/runtime/runtime.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * runtime.h - runtime initialization and metrics
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <base/stddef.h>
 8 | #include <base/time.h>
 9 | #include <runtime/thread.h>
10 | 
11 | 
12 | /* main initialization */
13 | typedef int (*initializer_fn_t)(void);
14 | 
15 | extern int runtime_set_initializers(initializer_fn_t global_fn,
16 | 				    initializer_fn_t perthread_fn,
17 | 				    initializer_fn_t late_fn);
18 | extern int runtime_init(const char *cfgpath, thread_fn_t main_fn, void *arg);
19 | 
20 | 
21 | extern struct runtime_info *runtime_info;
22 | 
23 | /**
24 |  * runtime_queue_us - returns the us of packet queueing delay + runtime queueing
25 |  * delay
26 |  */
27 | static inline uint64_t runtime_queue_us(void)
28 | {
29 | 	return ACCESS_ONCE(runtime_info->congestion.delay_us);
30 | }
31 | 
32 | /**
33 |  * runtime_load - returns the current CPU usage (number of cores)
34 |  */
35 | static inline float runtime_load(void)
36 | {
37 | 	return ACCESS_ONCE(runtime_info->congestion.load);
38 | }
39 | 
40 | /**
41 |  * runtime_active_cores - returns the number of currently active cores
42 |  *
43 |  */
44 | static inline int runtime_active_cores(void)
45 | {
46 | 	extern atomic_t runningks;
47 | 	return atomic_read(&runningks);
48 | }
49 | 
50 | /**
51 |  * runtime_max_cores - returns the maximum number of cores
52 |  *
53 |  * The runtime could be given at most this number of cores by the IOKernel.
54 |  */
55 | static inline int runtime_max_cores(void)
56 | {
57 | 	extern unsigned int maxks;
58 | 	return maxks;
59 | }
60 | 
61 | /**
62 |  * runtime_guaranteed_cores - returns the guaranteed number of cores
63 |  *
64 |  * The runtime will get at least this number of cores by the IOKernel if it
65 |  * requires them.
66 |  */
67 | static inline int runtime_guaranteed_cores(void)
68 | {
69 | 	extern unsigned int guaranteedks;
70 | 	return guaranteedks;
71 | }
72 | 


--------------------------------------------------------------------------------
/inc/runtime/smalloc.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * smalloc.h - malloc() based on the SLAB and thread-local item caches
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <string.h>
 8 | 
 9 | #include <base/stddef.h>
10 | 
11 | #define __smalloc_attr __malloc __assume_aligned(16)
12 | 
13 | extern void *smalloc(size_t size) __smalloc_attr;
14 | extern void *__szalloc(size_t size) __smalloc_attr;
15 | extern void sfree(void *item);
16 | 
17 | /**
18 |  * szalloc - allocates zeroed memory
19 |  * @size: the size of the item
20 |  *
21 |  * Returns an item or NULL if out of memory.
22 |  */
23 | static __always_inline void *szalloc(size_t size)
24 | {
25 | 	if (__builtin_constant_p(size)) {
26 | 		void *item = smalloc(size);
27 | 		if (unlikely(!item))
28 | 			return NULL;
29 | 		memset(item, 0, size);
30 | 		return item;
31 | 	}
32 | 	return __szalloc(size);
33 | }
34 | 
35 | /**
36 |  * smalloc_array - allocates a contiguous array of items
37 |  * @n: the number of items
38 |  * @size: the size of each item
39 |  *
40 |  * Returns an item array, or NULL if out of memory.
41 |  */
42 | static __always_inline void *smalloc_array(size_t n, size_t size)
43 | {
44 | 	return smalloc(n * size);
45 | }
46 | 


--------------------------------------------------------------------------------
/inc/runtime/storage.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * storage.h - Storage
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <base/stddef.h>
 8 | 
 9 | extern int storage_write(const void *payload, uint64_t lba, uint32_t lba_count);
10 | extern int storage_read(void *dest, uint64_t lba, uint32_t lba_count);
11 | 
12 | 
13 | 
14 | /*
15 |  * storage_block_size - get the size of a block from the nvme device
16 |  */
17 | static inline uint32_t storage_block_size(void)
18 | {
19 | 	extern uint32_t block_size;
20 | 	return block_size;
21 | }
22 | 
23 | /*
24 |  * storage_num_blocks - gets the number of blocks from the nvme device
25 |  */
26 | static inline uint64_t storage_num_blocks(void)
27 | {
28 | 	extern uint64_t num_blocks;
29 | 	return num_blocks;
30 | }
31 | 


--------------------------------------------------------------------------------
/inc/runtime/tcp.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * tcp.h - TCP sockets
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <runtime/net.h>
 8 | #include <sys/uio.h>
 9 | #include <sys/socket.h>
10 | 
11 | struct tcpqueue;
12 | typedef struct tcpqueue tcpqueue_t;
13 | struct tcpconn;
14 | typedef struct tcpconn tcpconn_t;
15 | 
16 | extern int tcp_dial(struct netaddr laddr, struct netaddr raddr,
17 | 		    tcpconn_t **c_out);
18 | extern int tcp_dial_nonblocking(struct netaddr laddr, struct netaddr raddr,
19 | 	                            tcpconn_t **c_out);
20 | extern int tcp_dial_affinity(uint32_t affinity, struct netaddr raddr,
21 | 		    tcpconn_t **c_out);
22 | extern int tcp_dial_conn_affinity(tcpconn_t *in, struct netaddr raddr,
23 | 		    tcpconn_t **c_out);
24 | 
25 | extern void tcp_set_nonblocking(tcpconn_t *c, bool nonblocking);
26 | 
27 | extern int tcp_listen(struct netaddr laddr, int backlog, tcpqueue_t **q_out);
28 | extern int tcp_accept(tcpqueue_t *q, tcpconn_t **c_out);
29 | extern void tcp_qshutdown(tcpqueue_t *q);
30 | extern void tcp_qclose(tcpqueue_t *q);
31 | extern void tcpq_set_nonblocking(tcpqueue_t *q, bool nonblocking);
32 | extern struct netaddr tcpq_local_addr(tcpqueue_t *q);
33 | extern int tcpq_backlog(tcpqueue_t *q);
34 | extern struct netaddr tcp_local_addr(tcpconn_t *c);
35 | extern struct netaddr tcp_remote_addr(tcpconn_t *c);
36 | extern int tcp_get_status(tcpconn_t *c);
37 | extern ssize_t tcp_read(tcpconn_t *c, void *buf, size_t len);
38 | extern ssize_t tcp_write(tcpconn_t *c, const void *buf, size_t len);
39 | extern ssize_t tcp_readv(tcpconn_t *c, const struct iovec *iov, int iovcnt);
40 | extern ssize_t tcp_writev(tcpconn_t *c, const struct iovec *iov, int iovcnt);
41 | extern int tcp_shutdown(tcpconn_t *c, int how);
42 | extern void tcp_abort(tcpconn_t *c);
43 | extern void tcp_close(tcpconn_t *c);
44 | 


--------------------------------------------------------------------------------
/inc/runtime/thread.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * thread.h - support for user-level threads
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <base/thread.h>
 8 | #include <base/types.h>
 9 | #include <base/compiler.h>
10 | #include <runtime/preempt.h>
11 | #include <iokernel/control.h>
12 | 
13 | struct thread;
14 | typedef void (*thread_fn_t)(void *arg);
15 | typedef struct thread thread_t;
16 | 
17 | 
18 | /*
19 |  * Low-level routines, these are helpful for bindings and synchronization
20 |  * primitives.
21 |  */
22 | 
23 | extern void thread_park_and_unlock_np(spinlock_t *l);
24 | extern void thread_park_and_preempt_enable(void);
25 | extern void thread_ready(thread_t *thread);
26 | extern void thread_ready_head(thread_t *thread);
27 | extern thread_t *thread_create(thread_fn_t fn, void *arg);
28 | extern thread_t *thread_create_with_buf(thread_fn_t fn, void **buf, size_t len);
29 | 
30 | DECLARE_PERTHREAD(thread_t *, __self);
31 | DECLARE_PERTHREAD(unsigned int, kthread_idx);
32 | 
33 | static inline unsigned int get_current_affinity(void)
34 | {
35 | 	return perthread_read(kthread_idx);
36 | }
37 | 
38 | /**
39 |  * thread_self - gets the currently running thread
40 |  */
41 | inline thread_t *thread_self(void)
42 | {
43 | 	return perthread_read_stable(__self);
44 | }
45 | 
46 | 
47 | extern uint64_t get_uthread_specific(void);
48 | extern void set_uthread_specific(uint64_t val);
49 | 
50 | 
51 | /*
52 |  * High-level routines, use this API most of the time.
53 |  */
54 | 
55 | extern void thread_yield(void);
56 | extern int thread_spawn(thread_fn_t fn, void *arg);
57 | extern void thread_exit(void) __noreturn;
58 | 


--------------------------------------------------------------------------------
/inc/runtime/timer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * timer.h - support for timers
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <asm/atomic.h>
 8 | #include <asm/ops.h>
 9 | #include <base/stddef.h>
10 | 
11 | typedef void (*timer_fn_t)(unsigned long arg);
12 | 
13 | struct kthread;
14 | 
15 | struct timer_entry {
16 | 	bool		armed;
17 | 	bool		executing;
18 | 	bool		cancelling;
19 | 	unsigned int	idx;
20 | 	timer_fn_t	fn;
21 | 	unsigned long	arg;
22 | 	struct kthread *localk;
23 | };
24 | 
25 | 
26 | /*
27 |  * Low-level API
28 |  */
29 | 
30 | /**
31 |  * timer_init - initializes a timer
32 |  * @e: the timer entry to initialize
33 |  * @fn: the timer handler (called when the timer fires)
34 |  * @arg: an argument passed to the timer handler
35 |  */
36 | static inline void
37 | timer_init(struct timer_entry *e, timer_fn_t fn, unsigned long arg)
38 | {
39 | 	e->armed = false;
40 | 	e->executing = false;
41 | 	e->fn = fn;
42 | 	e->arg = arg;
43 | 	e->localk = NULL;
44 | }
45 | 
46 | /**
47 |  * timer_finish - de-initializes a timer that has already expired
48 |  * @e: the timer entry
49 |  *
50 |  * Ensures that it is safe to reclaim the memory for timer_entry.
51 |  * This function may spin temporarily if racing with the timer firing code.
52 |  * Should not be called on a timer that hasn't expired yet - use timer_cancel
53 |  * instead.
54 |  */
55 | static inline void timer_finish(struct timer_entry *e)
56 | {
57 | 	assert(!e->armed);
58 | 
59 | 	if (unlikely(load_acquire(&e->executing)))
60 | 		while (load_acquire(&e->executing))
61 | 			cpu_relax();
62 | }
63 | 
64 | static inline bool timer_busy(const struct timer_entry *e)
65 | {
66 | 	return load_acquire(&e->armed) || load_acquire(&e->executing);
67 | }
68 | 
69 | extern void timer_start(struct timer_entry *e, uint64_t deadline_us);
70 | extern bool __timer_cancel(struct timer_entry *e);
71 | static inline bool timer_cancel(struct timer_entry *e)
72 | {
73 | 	if (!load_acquire(&e->armed)) {
74 | 		if (unlikely(load_acquire(&e->executing))) {
75 | 			while (load_acquire(&e->executing))
76 | 				cpu_relax();
77 | 		}
78 | 		return false;
79 | 	}
80 | 
81 | 	return __timer_cancel(e);
82 | }
83 | 
84 | extern void timer_restart(struct timer_entry *e, uint64_t deadline_us);
85 | extern bool timer_cancel_recurring(struct timer_entry *e);
86 | 
87 | 
88 | 
89 | /*
90 |  * High-level API
91 |  */
92 | 
93 | extern void timer_sleep_until(uint64_t deadline_us);
94 | extern void timer_sleep(uint64_t duration_us);
95 | 


--------------------------------------------------------------------------------
/iokernel/hw_timestamp.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * hw_timestamp.h - methods for tracking hardware timestamps in MLX5
 3 |  */
 4 | 
 5 | #ifdef MLX5
 6 | 
 7 | #include <infiniband/mlx5dv.h>
 8 | #include <util/mmio.h>
 9 | 
10 | #include "defs.h"
11 | 
12 | #define DEVICE_NAME_MAX 64
13 | extern char device_name[DEVICE_NAME_MAX];
14 | 
15 | extern double device_us_per_cycle;
16 | extern uint32_t curr_hw_time;
17 | extern void *hca_core_clock;
18 | 
19 | static inline bool is_hw_timestamp_enabled()
20 | {
21 | 	return !cfg.no_hw_qdel;
22 | }
23 | 
24 | static inline void hw_timestamp_update(void)
25 | {
26 | 	if (cfg.no_hw_qdel)
27 | 		return;
28 | 
29 | 	/* read the low 32 bits of the hardware counter */
30 | 	curr_hw_time = be32toh(mmio_read32_be(hca_core_clock + 4));
31 | }
32 | 
33 | static inline uint64_t hw_timestamp_delay_us(struct mlx5_cqe64 *cqe)
34 | {
35 | 	double us;
36 | 	uint32_t hwstamp = (uint32_t)be64toh(ACCESS_ONCE(cqe->timestamp));
37 | 
38 | 	if (wraps_lte(hwstamp, curr_hw_time)) {
39 | 		us = (double)(curr_hw_time - hwstamp) * device_us_per_cycle;
40 | 		return us;
41 | 	}
42 | 	return 0;
43 | }
44 | 
45 | #else
46 | 
47 | struct mlx5_cqe64;
48 | 
49 | static inline bool is_hw_timestamp_enabled()
50 | {
51 | 	return false;
52 | }
53 | static inline void hw_timestamp_update(void) {}
54 | static inline uint64_t hw_timestamp_delay_us(struct mlx5_cqe64 *cqe)
55 | {
56 | 	return 0;
57 | }
58 | 
59 | static inline int nl_register_mac_address(struct eth_addr *mac)
60 | {
61 | 	return 0;
62 | }
63 | 
64 | static inline int nl_remove_mac_address(struct eth_addr *mac)
65 | {
66 | 	return 0;
67 | }
68 | 
69 | 
70 | #endif
71 | 


--------------------------------------------------------------------------------
/iokernel/ias_ts.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ias_ts.c - the time sharing controller
 3 |  */
 4 | 
 5 | #include <base/stddef.h>
 6 | #include <base/log.h>
 7 | 
 8 | #include "defs.h"
 9 | #include "sched.h"
10 | #include "ksched.h"
11 | #include "ias.h"
12 | 
13 | /* statistics */
14 | uint64_t ias_ts_yield_count;
15 | 
16 | /**
17 |  * ias_ts_poll - runs the time sharing controller
18 |  */
19 | void ias_ts_poll(void)
20 | {
21 | 	struct thread *th;
22 | 	struct ias_data *sd;
23 | 	struct thread_metrics *m;
24 | 	unsigned int core, tmp;
25 | 
26 | 	sched_for_each_allowed_core(core, tmp) {
27 | 		sd = cores[core];
28 | 		if (!sd || sd->quantum_us == 0)
29 | 			continue;
30 | 		th = sched_get_thread_on_core(core);
31 | 		if (!th)
32 | 			continue;
33 | 
34 | 		m = &th->metrics;
35 | 		if (!m->work_pending || m->uthread_elapsed_us < sd->quantum_us)
36 | 			continue;
37 | 
38 | 		ias_ts_yield_count++;
39 | 		sched_yield_on_core(core);
40 | 	}
41 | }
42 | 
43 | void ias_core_ts_poll(void)
44 | {
45 | 	struct ias_data *sd, *sd_next;
46 | 	int ret, cnt;
47 | 
48 | 	/* if there are congested LCs, there are no BEs running. */
49 | 	if (congested_lc_procs_nr > 0)
50 | 		return;
51 | 
52 | 	cnt = 0;
53 | 
54 | 	/* Check BEs with 0 cores running */
55 | 	list_for_each_safe(&congested_procs[1], sd, sd_next, congested_link) {
56 | 		if (sd->threads_active > 0 || sd->threads_limit == 0)
57 | 			continue;
58 | 
59 | 		ret = ias_add_kthread(sd);
60 | 		if (ret)
61 | 			break;
62 | 
63 | 		if (++cnt == sched_cores_nr)
64 | 			return;
65 | 	}
66 | 
67 | }


--------------------------------------------------------------------------------
/iokernel/ksched.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ksched.c - an interface to the ksched kernel module
 3 |  */
 4 | 
 5 | #include <string.h>
 6 | #include <sys/types.h>
 7 | #include <sys/stat.h>
 8 | #include <sys/mman.h>
 9 | #include <fcntl.h>
10 | #include <unistd.h>
11 | 
12 | #include <base/log.h>
13 | 
14 | #include "ksched.h"
15 | 
16 | /* a file descriptor handle to the ksched kernel module */
17 | int ksched_fd;
18 | /* the number of pending interrupts */
19 | int ksched_count;
20 | /* the number of pending pmc-sampling interrupts */
21 | int ksched_pmc_count;
22 | /* whether UINTR is enabled */
23 | bool ksched_has_uintr;
24 | /* most recent core with an enqueued interrupt */
25 | int last_intr_core;
26 | /* the shared memory region with the kernel module */
27 | struct ksched_shm_cpu *ksched_shm;
28 | /* the set of pending cores to send interrupts to */
29 | cpu_set_t ksched_set;
30 | /* the generation number for each core */
31 | unsigned int ksched_gens[NCPU];
32 | 
33 | /**
34 |  * ksched_uintr_init - initializes UINTR using ksched kernel
35 |  *
36 |  * Must be called on the dataplane core.
37 |  *
38 |  * Returns 0 if successful.
39 |  */
40 | void ksched_uintr_init(void)
41 | {
42 | 	int ret;
43 | 	ret = ioctl(ksched_fd, KSCHED_IOC_UINTR_SETUP_ADMIN, 0);
44 | 	ksched_has_uintr = (ret == 0);
45 | 	log_info("UINTR: %s", ksched_has_uintr ? "enabled" : "disabled");
46 | }
47 | 
48 | /**
49 |  * ksched_init - initializes the ksched kernel module interface
50 |  *
51 |  * Returns 0 if successful.
52 |  */
53 | int ksched_init(void)
54 | {
55 | 	char *ksched_addr;
56 | 	int i;
57 | 
58 | 	/* first open the file descriptor */
59 | 	ksched_fd = open("/dev/ksched", O_RDWR);
60 | 	if (ksched_fd < 0) {
61 | 		log_err("Could not find ksched kernel module (%s). Please ensure that "
62 | 			    "ksched is compiled and inserted (see README for more details)",
63 | 			    strerror(errno));
64 | 		return -errno;
65 | 	}
66 | 
67 | 	if (ioctl(ksched_fd, KSCHED_IOC_GET_SCHED_API_VER) != KSCHED_SCHED_API_VER) {
68 | 		log_err("ksched module API mismatch");
69 | 		return -1;
70 | 	}
71 | 
72 | 	/* then map the shared memory region with the kernel */
73 | 	ksched_addr = mmap(NULL, sizeof(struct ksched_shm_cpu) * NCPU,
74 | 		    PROT_READ | PROT_WRITE, MAP_SHARED, ksched_fd, 0);
75 | 	if (ksched_addr == MAP_FAILED)
76 | 		return -errno;
77 | 
78 | 	/* then initialize the generation numbers */
79 | 	ksched_shm = (struct ksched_shm_cpu *)ksched_addr;
80 | 	for (i = 0; i < NCPU; i++) {
81 | 		ksched_gens[i] = load_acquire(&ksched_shm[i].last_gen);
82 | 		ksched_idle_hint(i, 0);
83 | 	}
84 | 
85 |         return 0;
86 | }
87 | 


--------------------------------------------------------------------------------
/iokernel/pcm.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | /* Declarations of relevant functions in patched PCM library */
4 | extern uint32_t pcm_caladan_get_cas_count(uint32_t channel);
5 | extern uint32_t pcm_caladan_get_active_channel_count(void);
6 | extern int pcm_caladan_init(int socket);
7 | 
8 | 


--------------------------------------------------------------------------------
/iokernel/pmc.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * pmc.h - useful definitions for Intel Performance Counters
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #define PMC_ESEL_UMASK_SHIFT    8
 8 | #define PMC_ESEL_CMASK_SHIFT    24
 9 | #define PMC_ESEL_ENTRY(event, umask, cmask)		\
10 |         (((event) & 0xFFUL) |				\
11 |          (((umask) & 0xFFUL) << PMC_ESEL_UMASK_SHIFT) |	\
12 |          (((cmask) & 0xFFUL) << PMC_ESEL_CMASK_SHIFT))
13 | #define PMC_ESEL_USR            (1ULL << 16) /* User Mode */
14 | #define PMC_ESEL_OS             (1ULL << 17) /* Kernel Mode */
15 | #define PMC_ESEL_EDGE           (1ULL << 18) /* Edge detect */
16 | #define PMC_ESEL_PC             (1ULL << 19) /* Pin control */
17 | #define PMC_ESEL_INT            (1ULL << 20) /* APIC interrupt enable */
18 | #define PMC_ESEL_ANY            (1ULL << 21) /* Any thread */
19 | #define PMC_ESEL_ENABLE         (1ULL << 22) /* Enable counters */
20 | #define PMC_ESEL_INV            (1ULL << 23) /* Invert counter mask */
21 | 
22 | /* architectural performance counters (works on all Intel CPUs) */
23 | #define PMC_ARCH_CORE_CYCLES    PMC_ESEL_ENTRY(0x3C, 0x00, 0)
24 | #define PMC_ARCH_INSTR_RETIRED  PMC_ESEL_ENTRY(0xC0, 0x00, 0)
25 | #define PMC_ARCH_REF_CYCLES     PMC_ESEL_ENTRY(0x3C, 0x01, 0)
26 | #define PMC_ARCH_LLC_REF        PMC_ESEL_ENTRY(0x2E, 0x4F, 0)
27 | #define PMC_ARCH_LLC_MISSES     PMC_ESEL_ENTRY(0x2E, 0x41, 0)
28 | #define PMC_ARCH_BRANCHES       PMC_ESEL_ENTRY(0xC4, 0x00, 0)
29 | #define PMC_ARCH_BRANCH_MISSES  PMC_ESEL_ENTRY(0xC5, 0x00, 0)
30 | 
31 | /* this performance counter measures LLC misses as a proxy for mem bandwidth */
32 | #define PMC_LLC_MISSES (PMC_ARCH_LLC_MISSES | PMC_ESEL_USR | PMC_ESEL_OS | \
33 | 			PMC_ESEL_ENABLE)
34 | #define PMC_LLC_MISSES_ANY (PMC_ARCH_LLC_MISSES | PMC_ESEL_USR | PMC_ESEL_OS | \
35 | 			    PMC_ESEL_ANY | PMC_ESEL_ENABLE)
36 | 


--------------------------------------------------------------------------------
/iokernel/ref.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ref.h - generic support for reference counts
 3 |  *
 4 |  * This implementation is inspired by the following paper:
 5 |  * Kroah-Hartman, Greg, kobjects and krefs. Linux Symposium 2004
 6 |  *
 7 |  * This version doesn't use atomics.
 8 |  */
 9 | 
10 | #pragma once
11 | 
12 | #include <base/stddef.h>
13 | 
14 | struct ref {
15 | 	int cnt;
16 | };
17 | 
18 | /**
19 |  * ref_init - initializes the reference count to one
20 |  * @ref: the kref
21 |  */
22 | static inline void
23 | ref_init(struct ref *ref)
24 | {
25 | 	ref->cnt = 1;
26 | }
27 | 
28 | /**
29 |  * ref_get - atomically increments the reference count
30 |  * @ref: the kref
31 |  */
32 | static inline void
33 | ref_get(struct ref *ref)
34 | {
35 | 	assert(ref->cnt > 0);
36 | 	ref->cnt++;
37 | }
38 | 
39 | /**
40 |  * ref_put - atomically decrements the reference count, releasing the object
41 |  *	     when it reaches zero
42 |  * @ref: the ref
43 |  * @release: a pointer to the release function
44 |  */
45 | static inline void
46 | ref_put(struct ref *ref, void (*release)(struct ref *ref))
47 | {
48 | 	assert(release);
49 | 	if (--ref->cnt == 0)
50 | 		release(ref);
51 | }
52 | 


--------------------------------------------------------------------------------
/iokernel/stat.c:
--------------------------------------------------------------------------------
 1 | #include <pthread.h>
 2 | #include <stdio.h>
 3 | 
 4 | #include <base/log.h>
 5 | 
 6 | #include "sched.h"
 7 | #include "defs.h"
 8 | 
 9 | #define BUFSIZE 4096
10 | 
11 | uint64_t stats[NR_STATS];
12 | 
13 | static const char *stat_names[] = {
14 | 	"RX_UNREGISTERED_MAC",
15 | 	"RX_UNICAST_FAIL",
16 | 	"RX_BROADCAST_FAIL",
17 | 	"RX_FLOW_TAG_MATCH",
18 | 	"RX_UNHANDLED",
19 | 	"RX_HASH_MISSING",
20 | 	"PARKED_THREAD_BUSY_WAKE",
21 | 	"PARK_FAST_REWAKE",
22 | 	"TX_COMPLETION_OVERFLOW",
23 | 	"TX_COMPLETION_FAIL",
24 | 	"RX_PULLED",
25 | 	"COMMANDS_PULLED",
26 | 	"COMPLETION_DRAINED",
27 | 	"COMPLETION_ENQUEUED",
28 | 	"LOOPS",
29 | 	"TX_PULLED",
30 | 	"TX_BACKPRESSURE",
31 | 	"SCHED_RUN",
32 | 	"PREEMPT",
33 | 	"RX_REFILL",
34 | 	"DIRECTPATH_EVENTS",
35 | 	"DMA_ENQUEUE",
36 | 	"DMA_DEQUEUE",
37 | 	"DMA_SUBMIT",
38 | };
39 | 
40 | BUILD_ASSERT(ARRAY_SIZE(stat_names) == NR_STATS);
41 | 
42 | static void print_stats(void)
43 | {
44 | 	int i;
45 | 	uint64_t now, cur_stats[NR_STATS];
46 | 	static uint64_t last_stats[NR_STATS];
47 | 
48 | 	barrier();
49 | 	now = rdtsc();
50 | 	for (i = 0; i < NR_STATS; i++)
51 | 		cur_stats[i] = ACCESS_ONCE(stats[i]);
52 | 	barrier();
53 | 
54 | 	printf("-----------------\n");
55 | 
56 | 	for (i = 0; i < NR_STATS; i++) {
57 | 		printf("%lu %s %lu\n", now, stat_names[i],
58 | 			   cur_stats[i] - last_stats[i]);
59 | 		last_stats[i] = cur_stats[i];
60 | 	}
61 | 
62 | 	fflush(stdout);
63 | }
64 | 
65 | static void *print_stats_thread(void *arg)
66 | {
67 | 	cpu_set_t cpuset;
68 | 	int ret;
69 | 
70 | 	CPU_ZERO(&cpuset);
71 | 	CPU_SET(sched_ctrl_core, &cpuset);
72 | 
73 | 	ret = sched_setaffinity(thread_gettid(), sizeof(cpu_set_t), &cpuset);
74 | 	if (ret < 0) {
75 | 		log_warn("log: failed to pin to contorl core with err %d", errno);
76 | 		return NULL;
77 | 	}
78 | 
79 | 	while (true) {
80 | 		print_stats();
81 | 		sleep(1);
82 | 	}
83 | }
84 | 
85 | int stats_init(void)
86 | {
87 | 	pthread_t tid;
88 | 
89 | 	return pthread_create(&tid, NULL, print_stats_thread, NULL);
90 | }
91 | 
92 | 


--------------------------------------------------------------------------------
/ksched/.gitignore:
--------------------------------------------------------------------------------
1 | build
2 | 


--------------------------------------------------------------------------------
/ksched/Kbuild:
--------------------------------------------------------------------------------
1 | obj-m += ksched.o fake_idle.o
2 | 
3 | ksched-objs := ksched_main.o uintr.o
4 | 


--------------------------------------------------------------------------------
/ksched/Makefile:
--------------------------------------------------------------------------------
 1 | KDIR ?= /lib/modules/$(shell uname -r)/build
 2 | BUILD_DIR ?= $(PWD)/build
 3 | BUILD_DIR_MAKEFILE ?= $(PWD)/build/Makefile
 4 | 
 5 | default: $(BUILD_DIR_MAKEFILE)
 6 | 	make -C $(KDIR) M=$(BUILD_DIR) src=$(PWD) modules
 7 | 
 8 | $(BUILD_DIR):
 9 | 	mkdir -p "$@"
10 | 
11 | $(BUILD_DIR_MAKEFILE): $(BUILD_DIR)
12 | 	touch "$@"
13 | 
14 | clean:
15 | 	make -C $(KDIR) M=$(BUILD_DIR) src=$(PWD) clean
16 | 


--------------------------------------------------------------------------------
/ksched/defs.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <asm/local.h>
 4 | 
 5 | #include "ksched.h"
 6 | #include "uintr_hw.h"
 7 | 
 8 | struct ksched_percpu {
 9 | 	unsigned int		last_gen;
10 | 	local_t			busy;
11 | 	u64			last_sel;
12 | 	struct task_struct	*running_task;
13 | 
14 | 	struct uintr_percpu	uintr;
15 | };
16 | 
17 | extern __read_mostly struct ksched_shm_cpu *shm;
18 | DECLARE_PER_CPU(struct ksched_percpu, kp);


--------------------------------------------------------------------------------
/ksched/fake_idle.c:
--------------------------------------------------------------------------------
 1 | #include <linux/cpu.h>
 2 | #include <linux/cpuidle.h>
 3 | #include <linux/kobject.h>
 4 | #include <linux/module.h>
 5 | #include <linux/moduleparam.h>
 6 | #include <linux/sched.h>
 7 | 
 8 | static struct kobject *kobj;
 9 | static int unloaded;
10 | static int refcnt_for_unload;
11 | 
12 | static int __cpuidle fake_idle(struct cpuidle_device *dev,
13 |                                  struct cpuidle_driver *drv, int index)
14 | {
15 |   return index;
16 | }
17 | 
18 | static struct cpuidle_driver fake_idle_driver = {
19 |         .name = "fake_idle",
20 |         .owner = THIS_MODULE,
21 |         .states = {
22 |                 {
23 |                         .enter                  = fake_idle,
24 |                         .exit_latency           = 1,
25 |                         .target_residency       = 1,
26 |                         .name                   = "",
27 |                         .desc                   = "",
28 | 		},
29 |         },
30 |         .safe_state_index = 0,
31 |         .state_count = 1,
32 | };
33 | 
34 | static ssize_t unload_store(struct kobject *kobj, struct kobj_attribute *attr,
35 |                                    const char *buf, size_t count) {
36 |   if (unloaded) return -ENODEV;
37 |   if (atomic_read(&THIS_MODULE->refcnt) + 1 != refcnt_for_unload) return -EBUSY;
38 |   unloaded = 1;
39 |   cpuidle_unregister(&fake_idle_driver);
40 |   return count;
41 | }
42 | 
43 | static ssize_t unload_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf){
44 |    return sprintf(buf, "%s\n", unloaded ? "unloaded" : "loaded");
45 | }
46 | 
47 | static struct kobj_attribute unload_attr = __ATTR(unload, 0644, unload_show, unload_store);
48 | 
49 | static int __init create_sysfs_entry(void)
50 | {
51 |   int err;
52 | 
53 |   kobj = kobject_create_and_add("fake_idle", NULL);
54 |   if (kobj == NULL)
55 |     return -ENOMEM;
56 |   err = sysfs_create_file(kobj, &unload_attr.attr);
57 |   if (err)
58 |     kobject_put(kobj);
59 | 
60 |   return err;
61 | }
62 | 
63 | static int __init fake_idle_init(void)
64 | {
65 |   int err;
66 | 
67 | 
68 |   err = create_sysfs_entry();
69 |   if (err)
70 |     return err;
71 | 
72 |   err = cpuidle_register(&fake_idle_driver, NULL);
73 |   if (err)
74 |     kobject_put(kobj);
75 | 
76 |   refcnt_for_unload = atomic_read(&THIS_MODULE->refcnt);
77 |   return err;
78 | }
79 | 
80 | static void __exit fake_idle_exit(void)
81 | {
82 |   kobject_put(kobj);
83 | }
84 | 
85 | module_init(fake_idle_init);
86 | module_exit(fake_idle_exit);
87 | 
88 | MODULE_LICENSE("GPL");
89 | 


--------------------------------------------------------------------------------
/ksched/ksched.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ksched.h - the UAPI for ksched and its ioctl's
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <linux/types.h>
 8 | #include <linux/ioctl.h>
 9 | 
10 | /*
11 |  * NOTE: normally character devices are dynamically allocated, but for
12 |  * convenience we use 280.
13 |  */
14 | #define KSCHED_MAJOR		280
15 | #define KSCHED_MINOR		0
16 | 
17 | /*
18 |  * Bump these version numbers when the API for the scheduler or programs is
19 |  * modified.
20 |  */
21 | #define KSCHED_SCHED_API_VER 	2
22 | #define KSCHED_USER_API_VER 	2
23 | 
24 | struct ksched_intr_req {
25 | 	size_t			len;
26 | 	const void __user	*mask;
27 | };
28 | 
29 | struct uintr_upid {
30 | 	union {
31 | 		struct {
32 | 			__u8 status;	/* bit 0: ON, bit 1: SN, bit 2-7: reserved */
33 | 			__u8 reserved1;	/* Reserved */
34 | 			__u8 nv;		/* Notification vector */
35 | 			__u8 reserved2;	/* Reserved */
36 | 			__u32 ndst;	/* Notification destination */
37 | 		} nc __packed;		/* Notification control */
38 | 		long unsigned int word_val;
39 | 	};
40 | 	__u64 puir;		/* Posted user interrupt requests */
41 | } __aligned(64);
42 | 
43 | struct ksched_shm_cpu {
44 | 	/* written by userspace */
45 | 	unsigned int		gen;
46 | 	pid_t			tid;
47 | 	unsigned int		mwait_hint;
48 | 	unsigned int		sig;
49 | 	unsigned int		signum;
50 | 	unsigned int		pmc;
51 | 	__u64			pmcsel;
52 | 
53 | 	/* written by kernelspace */
54 | 	unsigned int		busy;
55 | 	unsigned int		last_gen;
56 | 	__u64			pmcval;
57 | 	__u64			pmctsc;
58 | 
59 | 	/* extra space for future features (and cache alignment) */
60 | 	unsigned long		rsv[1];
61 | 
62 | 	struct uintr_upid 	upid;
63 | };
64 | 
65 | #define KSCHED_MAGIC		0xF0
66 | #define KSCHED_IOC_MAXNR	9
67 | 
68 | #define KSCHED_IOC_START	_IO(KSCHED_MAGIC, 1)
69 | #define KSCHED_IOC_PARK		_IO(KSCHED_MAGIC, 2)
70 | #define KSCHED_IOC_INTR		_IOW(KSCHED_MAGIC, 3, struct ksched_intr_req)
71 | #define KSCHED_IOC_UINTR_MULTICAST _IOW(KSCHED_MAGIC, 4, struct ksched_intr_req)
72 | #define KSCHED_IOC_UINTR_SETUP_USER		_IO(KSCHED_MAGIC, 5)
73 | #define KSCHED_IOC_UINTR_SETUP_ADMIN		_IO(KSCHED_MAGIC, 6)
74 | #define KSCHED_IOC_GETTID        _IO(KSCHED_MAGIC, 7)
75 | #define KSCHED_IOC_GET_USER_API_VER        _IO(KSCHED_MAGIC, 8)
76 | #define KSCHED_IOC_GET_SCHED_API_VER        _IO(KSCHED_MAGIC, 9)
77 | 


--------------------------------------------------------------------------------
/ksched/uintr.h:
--------------------------------------------------------------------------------
 1 | // uintr.h
 2 | 
 3 | #pragma once
 4 | 
 5 | #include <asm/apic.h>
 6 | #include <asm/local.h>
 7 | #include <asm/irq_vectors.h>
 8 | 
 9 | #include "uintr_hw.h"
10 | 
11 | /* Use KVM's posted interrupt vector */
12 | #define UIPI_APIC_VECTOR			POSTED_INTR_WAKEUP_VECTOR
13 | 
14 | struct uintr_ctx {
15 | 	unsigned long handler;
16 | 	struct kref refcount;
17 | 	bool is_admin;
18 | 
19 | 	/* sender UITT table */
20 | 	struct uintr_uitt_entry uitt[];
21 | };
22 | 
23 | struct uintr_xstate {
24 | 	struct xregs_state xregs;
25 | 	struct uintr_state uintr;
26 | } __packed __aligned(64);
27 | 
28 | struct uintr_percpu {
29 | 	struct task_struct	*assigned_task;
30 | 	struct uintr_ctx	*assigned_ctx;
31 | 
32 | 	bool		state_loaded;
33 | 	bool		is_admin_ctx;
34 | 	struct uintr_xstate cur_xstate;
35 | };
36 | 
37 | extern void uintr_cleanup_core(struct uintr_percpu *p, int cpu);
38 | extern void uintr_assign_core(struct uintr_ctx *ctx, u64 stack);
39 | extern long uintr_multicast(struct ksched_intr_req __user *ureq);
40 | 
41 | extern void uintr_deliver_ipi(struct uintr_percpu *p);
42 | 
43 | extern int uintr_init(void);
44 | extern void uintr_exit(void);
45 | extern long uintr_setup_admin(struct file *filp);
46 | extern long uintr_setup_user(struct file *filp, unsigned long handler);
47 | extern void uintr_file_release(struct file *filp);
48 | 
49 | extern bool uintr_enabled;
50 | 
51 | static inline struct uintr_ctx *to_uintr_ctx(struct file *filp)
52 | {
53 | 	return (struct uintr_ctx *)filp->private_data;
54 | }
55 | 
56 | static inline bool uintr_active(struct uintr_percpu *p)
57 | {
58 | 	return p->assigned_ctx != NULL;
59 | }
60 | 
61 | static inline void uintr_signal_self(void)
62 | {
63 | 	apic->send_IPI_self(UIPI_APIC_VECTOR);
64 | }


--------------------------------------------------------------------------------
/ksched/uintr_hw.h:
--------------------------------------------------------------------------------
 1 | 
 2 | // UINTR hardware definitions
 3 | 
 4 | #pragma once
 5 | 
 6 | #define X86_FEATURE_UINTR		(18*32+ 5) /* User Interrupts support */
 7 | #define DISABLE_UINTR		(1 << (X86_FEATURE_UINTR & 31))
 8 | 
 9 | /* User Interrupt interface */
10 | #define MSR_IA32_UINTR_RR		0x985
11 | #define MSR_IA32_UINTR_HANDLER		0x986
12 | #define MSR_IA32_UINTR_STACKADJUST	0x987
13 | #define MSR_IA32_UINTR_MISC		0x988	/* 39:32-UINV, 31:0-UITTSZ */
14 | #define MSR_IA32_UINTR_PD		0x989
15 | #define MSR_IA32_UINTR_TT		0x98a
16 | 
17 | #define X86_CR4_UINTR_BIT	25 /* enable User Interrupts support */
18 | #define X86_CR4_UINTR		_BITUL(X86_CR4_UINTR_BIT)
19 | 
20 | #define UINTR_UPID_STATUS_ON		0x0	/* Outstanding notification */
21 | #define UINTR_UPID_STATUS_SN		0x1	/* Suppressed notification */
22 | 
23 | #define UINTR_UITT_VALID_BIT        0x0
24 | 
25 | /*
26 |  * State component 14 is supervisor state used for User Interrupts state.
27 |  * The size of this state is 48 bytes
28 |  */
29 | struct uintr_state {
30 | 	__u64 handler;
31 | 	__u64 stack_adjust;
32 | 	struct {
33 | 		__u32	uitt_size;
34 | 		__u8	uinv;
35 | 		__u8	pad1;
36 | 		__u8	pad2;
37 | 		__u8	pad3:7;
38 | 		__u8	uif:1;
39 | 	} __packed misc;
40 | 	__u64 upid_addr;
41 | 	__u64 uirr;
42 | 	__u64 uitt_addr;
43 | } __packed;
44 | 
45 | /* User Interrupt Target Table Entry (UITTE) */
46 | struct uintr_uitt_entry {
47 | 	__u8	valid;			/* bit 0: valid, bit 1-7: reserved */
48 | 	__u8	user_vec;
49 | 	__u8	reserved[6];
50 | 	__u64	target_upid_addr;
51 | } __packed __aligned(16);
52 | 
53 | #define XFEATURE_UINTR 14
54 | #define XFEATURE_MASK_UINTR		(1 << XFEATURE_UINTR)
55 | 


--------------------------------------------------------------------------------
/runtime/net/directpath/defs.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #pragma once
 3 | 
 4 | #include <base/pci.h>
 5 | #include <base/tcache.h>
 6 | #include <base/thread.h>
 7 | #include <iokernel/directpath.h>
 8 | #include <iokernel/queue.h>
 9 | 
10 | #include "../defs.h"
11 | 
12 | 
13 | #define RQ_NUM_DESC			1024
14 | #define SQ_NUM_DESC			128
15 | #define SQ_CLEAN_THRESH			RUNTIME_RX_BATCH_SIZE
16 | #define SQ_CLEAN_MAX			SQ_CLEAN_THRESH
17 | 
18 | /* space for the mbuf struct */
19 | #define RX_BUF_HEAD \
20 |  (align_up(sizeof(struct mbuf), 2 * CACHE_LINE_SIZE))
21 | /* some NICs expect enough padding for CRC etc., even if they strip it */
22 | #define RX_BUF_TAIL			64
23 | 
24 | static inline size_t directpath_get_buf_size(void)
25 | {
26 | 	if (cfg_directpath_strided)
27 | 		return DIRECTPATH_STRIDE_MODE_BUF_SZ;
28 | 
29 | 	return align_up(net_get_mtu() + RX_BUF_HEAD + RX_BUF_TAIL,
30 | 			2 * CACHE_LINE_SIZE);
31 | }
32 | 
33 | extern struct pci_addr nic_pci_addr;
34 | extern struct mempool directpath_buf_mp;
35 | extern struct tcache *directpath_buf_tcache;
36 | extern DEFINE_PERTHREAD(struct tcache_perthread, directpath_buf_pt);
37 | extern void directpath_rx_completion(struct mbuf *m);
38 | 
39 | extern int mlx5_init(void);
40 | extern int mlx5_init_thread(void);
41 | 


--------------------------------------------------------------------------------
/runtime/net/ping.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ping.c - simple ping utility
 3 |  */
 4 | 
 5 | #include <sys/time.h>
 6 | 
 7 | #include <base/compiler.h>
 8 | #include <base/log.h>
 9 | #include <net/icmp.h>
10 | #include <net/ping.h>
11 | 
12 | #include "defs.h"
13 | 
14 | static uint16_t ping_id;
15 | 
16 | int net_ping_init(void)
17 | {
18 | 	ping_id = rand();
19 | 	return 0;
20 | }
21 | 
22 | void net_send_ping(uint16_t seq_num, uint32_t daddr)
23 | {
24 | 	struct mbuf *m;
25 | 	struct ping_payload *payload;
26 | 
27 | 	log_debug("ping: sending ping with id %u, seq_num %u to %u", ping_id,
28 | 			seq_num, daddr);
29 | 
30 | 	m = net_tx_alloc_mbuf(ip_headroom() + sizeof(struct icmp_hdr));
31 | 	if (unlikely(!m))
32 | 		return;
33 | 
34 | 	/* add send timestamp to payload */
35 | 	payload = mbuf_push_hdr(m, struct ping_payload);
36 | 	gettimeofday(&payload->tx_time, NULL);
37 | 
38 | 	if (unlikely(net_tx_icmp(m, ICMP_ECHO, 0, daddr, ping_id, seq_num) != 0))
39 | 		mbuf_free(m);
40 | }
41 | 
42 | /*
43 |  * Subtract 2 timeval structs: out -= in. Assume out >= in.
44 |  */
45 | static void timeval_subtract(struct timeval *out, const struct timeval *in)
46 | {
47 | 	if ((out->tv_usec -= in->tv_usec) < 0) {
48 | 		--out->tv_sec;
49 | 		out->tv_usec += 1000000;
50 | 	}
51 | 	out->tv_sec -= in->tv_sec;
52 | }
53 | 
54 | void net_recv_ping(const struct ping_payload *payload,
55 | 		const struct icmp_pkt *icmp_pkt)
56 | {
57 | 	struct timeval tmp_time;
58 | 	uint32_t latency_us;
59 | 
60 | 	if (icmp_pkt->icmp_id != ping_id) {
61 | 		/* this ICMP pkt is not for us */
62 | 		return;
63 | 	}
64 | 
65 | 	/* determine latency */
66 | 	gettimeofday(&tmp_time, NULL);
67 | 	timeval_subtract(&tmp_time, &payload->tx_time);
68 | 	latency_us = tmp_time.tv_sec * 1000000 + tmp_time.tv_usec;
69 | 
70 | 	log_debug("ping: received ping with seq_num %u, latency %u us",
71 | 			icmp_pkt->icmp_seq, latency_us);
72 | }
73 | 


--------------------------------------------------------------------------------
/runtime/poll.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * poll.h - support for event polling (similar to select/epoll/poll, etc.)
  3 |  */
  4 | 
  5 | #include <runtime/poll.h>
  6 | 
  7 | /**
  8 |  * poll_init - initializes a polling waiter object
  9 |  * @w: the waiter object to initialize
 10 |  */
 11 | void poll_init(poll_waiter_t *w)
 12 | {
 13 | 	spin_lock_init(&w->lock);
 14 | 	list_head_init(&w->triggered);
 15 | 	w->waiting_th = NULL;
 16 | }
 17 | 
 18 | /**
 19 |  * poll_arm - registers a trigger with a waiter
 20 |  * @w: the waiter to register with
 21 |  * @t: the trigger to register
 22 |  * @data: data to provide when the trigger fires
 23 |  */
 24 | void poll_arm(poll_waiter_t *w, poll_trigger_t *t, unsigned long data)
 25 | {
 26 | 	if (WARN_ON(t->waiter != NULL))
 27 | 		return;
 28 | 
 29 | 	t->waiter = w;
 30 | 	t->triggered = false;
 31 | 	t->data = data;
 32 | }
 33 | 
 34 | /**
 35 |  * poll_disarm - unregisters a trigger with a waiter
 36 |  * @t: the trigger to unregister
 37 |  */
 38 | void poll_disarm(poll_trigger_t *t)
 39 | {
 40 | 	poll_waiter_t *w;
 41 | 	if (WARN_ON(t->waiter == NULL))
 42 | 		return;
 43 | 
 44 | 	w = t->waiter;
 45 | 	spin_lock_np(&w->lock);
 46 | 	if (t->triggered) {
 47 | 		list_del(&t->link);
 48 | 		t->triggered = false;
 49 | 	}
 50 | 	spin_unlock_np(&w->lock);
 51 | 
 52 | 	t->waiter = NULL;
 53 | }
 54 | 
 55 | /**
 56 |  * poll_wait - waits for the next event to trigger
 57 |  * @w: the waiter to wait on
 58 |  *
 59 |  * Returns the data provided to the trigger that fired
 60 |  */
 61 | unsigned long poll_wait(poll_waiter_t *w)
 62 | {
 63 | 	thread_t *th = thread_self();
 64 | 	poll_trigger_t *t;
 65 | 
 66 | 	while (true) {
 67 | 		spin_lock_np(&w->lock);
 68 | 		t = list_pop(&w->triggered, poll_trigger_t, link);
 69 | 		if (t) {
 70 | 			spin_unlock_np(&w->lock);
 71 | 			return t->data;
 72 | 		}
 73 | 		w->waiting_th = th;
 74 | 		thread_park_and_unlock_np(&w->lock);
 75 | 	}
 76 | }
 77 | 
 78 | /**
 79 |  * poll_trigger - fires a trigger
 80 |  * @w: the waiter to wake up (if it is waiting)
 81 |  * @t: the trigger that fired
 82 |  */
 83 | void poll_trigger(poll_waiter_t *w, poll_trigger_t *t)
 84 | {
 85 | 	thread_t *wth = NULL;
 86 | 
 87 | 	spin_lock_np(&w->lock);
 88 | 	if (t->triggered) {
 89 | 		spin_unlock_np(&w->lock);
 90 | 		return;
 91 | 	}
 92 | 	t->triggered = true;
 93 | 	list_add(&w->triggered, &t->link);
 94 | 	if (w->waiting_th) {
 95 | 		wth = w->waiting_th;
 96 | 		w->waiting_th = NULL;
 97 | 	}
 98 | 	spin_unlock_np(&w->lock);
 99 | 
100 | 	if (wth)
101 | 		thread_ready(wth);
102 | }
103 | 


--------------------------------------------------------------------------------
/sample.config:
--------------------------------------------------------------------------------
1 | # an example runtime config file
2 | host_addr 192.168.1.5
3 | host_netmask 255.255.255.0
4 | host_gateway 192.168.1.1
5 | runtime_kthreads 3
6 | runtime_guaranteed_kthreads 0
7 | runtime_priority be
8 | 


--------------------------------------------------------------------------------
/scripts/count_loc.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # run from shenango directory
 4 | 
 5 | echo "Runtime"
 6 | cloc runtime/* bindings/* inc/runtime/* --exclude-lang=D
 7 | 
 8 | echo "IOKernel"
 9 | cloc iokernel/* inc/iokernel/* --exclude-lang=D
10 | 
11 | echo "Ksched"
12 | cloc ksched/*
13 | 
14 | echo "Base"
15 | cloc base/* net/* inc/base/* inc/asm/* inc/net/* --exclude-lang=D
16 | 
17 | echo "Spin-server + Loadgen"
18 | cloc apps/synthetic/* --exclude-lang=D
19 | 


--------------------------------------------------------------------------------
/scripts/cstate.c:
--------------------------------------------------------------------------------
 1 | #include <sys/types.h>
 2 | #include <sys/stat.h>
 3 | #include <fcntl.h>
 4 | #include <stdlib.h>
 5 | #include <errno.h>
 6 | #include <stdint.h>
 7 | #include <stdio.h>
 8 | #include <string.h>
 9 | #include <unistd.h>
10 | 
11 | static int pm_qos_fd = -1;
12 | 
13 | void set_latency_target(int32_t target_us)
14 | {
15 | 	ssize_t ret;
16 | 
17 | 	if (pm_qos_fd >= 0)
18 | 		return;
19 | 	pm_qos_fd = open("/dev/cpu_dma_latency", O_RDWR);
20 | 	if (pm_qos_fd < 0) {
21 | 		fprintf(stderr, "Failed to open PM QOS file: %s\n",
22 | 			strerror(errno));
23 | 		exit(errno);
24 | 	}
25 | 
26 | 	ret = write(pm_qos_fd, &target_us, sizeof(target_us));
27 | 	if (ret < 0) {
28 | 		fprintf(stderr, "Fail to set QOS target\n");
29 | 		exit(errno);
30 | 	}
31 | }
32 | 
33 | int main(int argc, char *argv[])
34 | {
35 | 	if (argc < 2) {
36 | 		printf("usage: [maximum c-state latency in microseconds]\n");
37 | 		exit(1);
38 | 	}
39 | 
40 | 	set_latency_target(atoi(argv[1]));
41 | 	while (1)
42 | 		sleep(10);
43 | 	return 0;
44 | }
45 | 


--------------------------------------------------------------------------------
/scripts/setup_machine.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # run with sudo
 3 | 
 4 | # needed for the iokernel's shared memory
 5 | sysctl -w kernel.shm_rmid_forced=1
 6 | sysctl -w kernel.shmmax=18446744073692774399
 7 | sysctl -w vm.hugetlb_shm_group=27
 8 | sysctl -w vm.max_map_count=16777216
 9 | sysctl -w net.core.somaxconn=3072
10 | 
11 | # check to see if we need a fake idle driver
12 | if grep -q none /sys/devices/system/cpu/cpuidle/current_driver; then
13 |   insmod $(dirname $0)/../ksched/build/fake_idle.ko
14 | fi
15 | 
16 | # set up the ksched module
17 | rmmod ksched
18 | rm /dev/ksched
19 | 
20 | if [[ "$1x" = "nouintrx" ]]; then
21 |   insmod $(dirname $0)/../ksched/build/ksched.ko nouintr=1
22 | else
23 |   insmod $(dirname $0)/../ksched/build/ksched.ko
24 | fi
25 | 
26 | mknod /dev/ksched c 280 0
27 | chmod uga+rwx /dev/ksched
28 | 
29 | # reserve huge pages
30 | echo 5192 | sudo tee  /sys/devices/system/node/node*/hugepages/hugepages-2048kB/nr_hugepages > /dev/null
31 | 
32 | echo madvise > /sys/kernel/mm/transparent_hugepage/enabled
33 | 
34 | # load msr module
35 | modprobe msr
36 | 
37 | 


--------------------------------------------------------------------------------
/scripts/spin.cc:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdint.h>
 3 | #include <sys/mman.h>
 4 | #include <sys/types.h>
 5 | #include <sys/syscall.h>
 6 | #include <unistd.h>
 7 | #include <sched.h>
 8 | #include <linux/futex.h>
 9 | #include <sys/time.h>
10 | #include <errno.h>
11 | #include <stdlib.h>
12 | 
13 | #include <deque>
14 | #include <random>
15 | #include <algorithm>
16 | #include <atomic>
17 | 
18 | typedef uint64_t cycle_t;
19 | 
20 | static inline cycle_t rdtsc(void)
21 | {
22 | 	uint32_t a, d;
23 | 	asm volatile("rdtsc" : "=a" (a), "=d" (d));
24 | 	return ((uint64_t)a) | (((uint64_t)d) << 32);
25 | }
26 | 
27 | static inline cycle_t rdtscp(uint32_t *auxp)
28 | {
29 | 	uint32_t a, d, c;
30 | 	asm volatile("rdtscp" : "=a" (a), "=d" (d), "=c" (c));
31 | 	if (auxp)
32 | 		*auxp = c;
33 | 	return ((uint64_t)a) | (((uint64_t)d) << 32);
34 | }
35 | 
36 | #define N	1000
37 | static cycle_t results[N];
38 | static int nr;
39 | 
40 | int main(int argc, char *argv[])
41 | {
42 | 	cycle_t start, end;
43 | 
44 | 	while (nr < N) {
45 | 		start = rdtsc();
46 | 		end = rdtscp(NULL);
47 | 		if (end - start > 1000)
48 | 			results[nr++] = end - start;
49 | 	}
50 | 
51 |         std::sort(std::begin(results), std::end(results));
52 |         printf("median: %ld 99th: %ld 99.9th: %ld 99.99th: %ld\n",
53 |                results[nr / 2], results[nr * 99 / 100],
54 |                results[nr * 999 / 1000], results[nr * 9999 / 10000]);
55 | 	return 0;
56 | }
57 | 


--------------------------------------------------------------------------------
/server.config:
--------------------------------------------------------------------------------
1 | # an example runtime config file
2 | host_addr 192.168.1.3
3 | host_netmask 255.255.255.0
4 | host_gateway 192.168.1.1
5 | runtime_kthreads 4
6 | runtime_guaranteed_kthreads 4
7 | runtime_priority lc
8 | 


--------------------------------------------------------------------------------
/shim/Makefile:
--------------------------------------------------------------------------------
 1 | ROOT_PATH=../
 2 | include $(ROOT_PATH)/build/shared.mk
 3 | 
 4 | # handy for debugging
 5 | print-%  : ; @echo $* = $($*)
 6 | 
 7 | # libshim.a - the shenango shim library
 8 | shim_src = $(wildcard *.c)
 9 | shim_obj = $(shim_src:.c=.o)
10 | 
11 | # must be first
12 | all: libshim.a
13 | 
14 | libshim.a: $(shim_obj)
15 | 	$(AR) rcs $@ $^
16 | 
17 | # general build rules for all targets
18 | src = $(shim_src)
19 | obj = $(src:.c=.o)
20 | dep = $(obj:.o=.d)
21 | 
22 | ifneq ($(MAKECMDGOALS),clean)
23 | -include $(dep)   # include all dep files in the makefile
24 | endif
25 | 
26 | # rule to generate a dep file by using the C preprocessor
27 | # (see man cpp for details on the -MM and -MT options)
28 | %.d: %.c
29 | 	@$(CC) $(CFLAGS) $< -MM -MT $(@:.d=.o) >$@
30 | %.o: %.c
31 | 	$(CC) $(CFLAGS) -c $< -o $@
32 | 
33 | .PHONY: clean
34 | clean:
35 | 	rm -f $(obj) $(dep) libshim.a
36 | 


--------------------------------------------------------------------------------
/shim/README:
--------------------------------------------------------------------------------
1 | 
2 | To use, compile libshim.a and the target application with it. Link the dynamic loader library (-ldl) and use the linker flag '-Wl,--wrap=main' to wrap main.
3 | Make sure the application doesn't use static initializers for pthread mutexes etc.
4 | 


--------------------------------------------------------------------------------
/shim/common.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <dlfcn.h>
 4 | #include <base/assert.h>
 5 | #include <base/init.h>
 6 | #include <runtime/sync.h>
 7 | 
 8 | static inline bool shim_active(void)
 9 | {
10 | 	return base_init_done && thread_self() != NULL;
11 | }
12 | 
13 | static inline void shim_preempt_enable(void)
14 | {
15 | 	if (likely(shim_active()))
16 | 		preempt_enable();
17 | }
18 | 
19 | static inline void shim_preempt_disable(void)
20 | {
21 | 	if (likely(shim_active()))
22 | 		preempt_disable();
23 | }
24 | 
25 | 
26 | static inline void shim_spin_unlock_np(spinlock_t *l)
27 | {
28 | 	spin_unlock(l);
29 | 	shim_preempt_enable();
30 | }
31 | 
32 | static inline void shim_spin_lock_np(spinlock_t *l)
33 | {
34 | 	shim_preempt_disable();
35 | 	spin_lock(l);
36 | }
37 | 
38 | #define NOTSELF(name, ...)                                                     \
39 |         if (unlikely(!shim_active())) {                                        \
40 |                 static typeof(name) *fn;                                       \
41 |                 if (!fn) {                                                     \
42 |                         fn = dlsym(RTLD_NEXT, #name);                          \
43 |                         BUG_ON(!fn);                                           \
44 |                 }                                                              \
45 |                 return fn(__VA_ARGS__);                                        \
46 |         }
47 | 


--------------------------------------------------------------------------------
/shim/entry.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <stdio.h>
 3 | 
 4 | #include <runtime/runtime.h>
 5 | 
 6 | int __real_main(int, char **);
 7 | 
 8 | static int main_argc, main_ret;
 9 | static char **main_argv;
10 | 
11 | static void runtime_entry(void *arg)
12 | {
13 | 	main_ret = __real_main(main_argc, main_argv);
14 | }
15 | 
16 | int __weak __wrap_main(int argc, char **argv)
17 | {
18 | 	int ret;
19 | 
20 | 	if (argc < 2) {
21 | 		fprintf(stderr, "Error: missing shenango config argument\n");
22 | 		return 0;
23 | 	}
24 | 
25 | 	char *cfg = argv[1];
26 | 	argv[1] = argv[0];
27 | 	main_argv = &argv[1];
28 | 	main_argc = argc - 1;
29 | 
30 | 	ret = runtime_init(cfg, runtime_entry, NULL);
31 | 	if (ret) {
32 | 		fprintf(stderr, "failed to start runtime\n");
33 | 		return ret;
34 | 	}
35 | 
36 | 	return main_ret;
37 | }
38 | 


--------------------------------------------------------------------------------
/shim/sleep.c:
--------------------------------------------------------------------------------
 1 | #include <time.h>
 2 | 
 3 | #include <base/time.h>
 4 | #include <runtime/thread.h>
 5 | #include <runtime/timer.h>
 6 | 
 7 | #include "common.h"
 8 | 
 9 | int usleep(useconds_t usec)
10 | {
11 | 	NOTSELF(usleep, usec);
12 | 	timer_sleep(usec);
13 | 	return 0;
14 | }
15 | 
16 | unsigned int sleep(unsigned int seconds)
17 | {
18 | 	NOTSELF(sleep, seconds);
19 | 	timer_sleep(seconds * ONE_SECOND);
20 | 	return 0;
21 | }
22 | 
23 | int nanosleep(const struct timespec *req, struct timespec *rem)
24 | {
25 | 	NOTSELF(nanosleep, req, rem);
26 | 
27 | 	timer_sleep(req->tv_sec * ONE_SECOND + req->tv_nsec / 1000);
28 | 
29 | 	if (rem) {
30 | 		rem->tv_sec = 0;
31 | 		rem->tv_nsec = 0;
32 | 	}
33 | 
34 | 	return 0;
35 | }


--------------------------------------------------------------------------------
/tests/.gitignore:
--------------------------------------------------------------------------------
 1 | test_base_gen
 2 | test_base_hello
 3 | test_base_lrpc
 4 | test_base_thread
 5 | test_hello
 6 | test_kthread_attach
 7 | test_kthread_wakeup
 8 | test_many_threads
 9 | test_multiple_runtimes
10 | test_ping
11 | test_runtime_smalloc
12 | test_runtime_threads
13 | test_runtime_mutexes
14 | test_runtime_rcu
15 | test_runtime_timer
16 | test_smalloc
17 | test_thread
18 | test_udp_echo
19 | test_storage
20 | test_storage_iops
21 | netperf
22 | 


--------------------------------------------------------------------------------
/tests/test_base_gen.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * test_base_gen.c - tests generation numbers
 3 |  */
 4 | 
 5 | #include <base/gen.h>
 6 | #include <base/init.h>
 7 | #include <base/log.h>
 8 | 
 9 | static void test_gen(void)
10 | {
11 | 	uint32_t gen;
12 | 	struct gen_num gen_writer, gen_reader;
13 | 
14 | 	/* init */
15 | 	gen = 0;
16 | 	gen_init(&gen_writer, &gen);
17 | 	gen_init(&gen_reader, &gen);
18 | 
19 | 	/* no gen -> gen */
20 | 	gen_active(&gen_writer);
21 | 	BUG_ON(gen_in_same_gen(&gen_reader));
22 | 	BUG_ON(!gen_in_same_gen(&gen_reader));
23 | 
24 | 	/* gen -> gen */
25 | 	gen_active(&gen_writer);
26 | 	BUG_ON(!gen_in_same_gen(&gen_reader));
27 | 
28 | 	/* gen -> no gen -> gen */
29 | 	gen_inactive(&gen_writer);
30 | 	gen_active(&gen_writer);
31 | 	BUG_ON(gen_in_same_gen(&gen_reader));
32 | 
33 | 	/* gen -> no gen */
34 | 	gen_inactive(&gen_writer);
35 | 	BUG_ON(gen_in_same_gen(&gen_reader));
36 | 	BUG_ON(gen_in_same_gen(&gen_reader));
37 | 
38 | 	/* no gen -> no gen */
39 | 	gen_inactive(&gen_writer);
40 | 	BUG_ON(gen_in_same_gen(&gen_reader));
41 | 
42 | 	/* no gen -> gen -> no gen */
43 | 	gen_active(&gen_writer);
44 | 	gen_inactive(&gen_writer);
45 | 	BUG_ON(gen_in_same_gen(&gen_reader));
46 | 
47 | 	log_debug("success");
48 | }
49 | 
50 | int main(int argc, char *argv[])
51 | {
52 | 	int ret;
53 | 
54 | 	ret = base_init();
55 | 	if (ret) {
56 | 		log_err("base_init() failed, ret = %d", ret);
57 | 		return 1;
58 | 	}
59 | 	BUG_ON(!base_init_done);
60 | 
61 | 	test_gen();
62 | 	return 0;
63 | }
64 | 


--------------------------------------------------------------------------------
/tests/test_base_hello.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * test_base_hello.c - this tests verifies that the base library can initialize
 3 |  */
 4 | 
 5 | #include <base/init.h>
 6 | #include <base/log.h>
 7 | #include <base/assert.h>
 8 | 
 9 | int main(int argc, char *argv[])
10 | {
11 | 	int ret;
12 | 
13 | 	ret = base_init();
14 | 	if (ret) {
15 | 		log_err("base_init() failed, ret = %d", ret);
16 | 		return 1;
17 | 	}
18 | 	BUG_ON(!base_init_done);
19 | 
20 | 	ret = base_init_thread();
21 | 	if (ret) {
22 | 		log_err("base_init_thread() failed, ret = %d", ret);
23 | 		return 1;
24 | 	}
25 | 	BUG_ON(!perthread_read(thread_init_done));
26 | 
27 | 	log_info("hello world!");
28 | 	return 0;
29 | }
30 | 


--------------------------------------------------------------------------------
/tests/test_base_thread.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * test_base_thread.c - this base support for threads
 3 |  */
 4 | 
 5 | #include <pthread.h>
 6 | 
 7 | #include <base/init.h>
 8 | #include <base/log.h>
 9 | #include <base/assert.h>
10 | #include <base/cpu.h>
11 | #include <base/thread.h>
12 | 
13 | #define PERTHREAD_VAL	10
14 | static DEFINE_PERTHREAD(int, blah);
15 | 
16 | static int init_thread(void)
17 | {
18 | 	int ret;
19 | 
20 | 	ret = base_init_thread();
21 | 	if (ret) {
22 | 		log_err("base_init_thread() failed, ret = %d", ret);
23 | 		return 1;
24 | 	}
25 | 	BUG_ON(!perthread_read(thread_init_done));
26 | 	BUG_ON(perthread_get(blah) != 0);
27 | 
28 | 	perthread_get(blah) = PERTHREAD_VAL;
29 | 	BUG_ON(perthread_get(blah) != PERTHREAD_VAL);
30 | 
31 | 	return ret;
32 | }
33 | 
34 | static void *test_thread(void *data)
35 | {
36 | 	int ret;
37 | 
38 | 	ret = init_thread();
39 | 	BUG_ON(ret);
40 | 	log_info("hello thread %d", this_thread_id());
41 | 
42 | 	return NULL;
43 | }
44 | 
45 | int main(int argc, char *argv[])
46 | {
47 | 	pthread_t tid[NCPU];
48 | 	int ret, i;
49 | 
50 | 	ret = base_init();
51 | 	if (ret) {
52 | 		log_err("base_init() failed, ret = %d", ret);
53 | 		return 1;
54 | 	}
55 | 	BUG_ON(!base_init_done);
56 | 	BUG_ON(cpu_count < 1);
57 | 
58 | 	init_thread();
59 | 
60 | 	for (i = 1; i < cpu_count; i++) {
61 | 		ret = pthread_create(&tid[i], NULL, test_thread, NULL);
62 | 		BUG_ON(ret);
63 | 	}
64 | 
65 | 	for (i = 1; i < cpu_count; i++) {
66 | 		ret = pthread_join(tid[i], NULL);
67 | 		BUG_ON(ret);
68 | 	}
69 | 
70 | 	log_info("joined all threads");
71 | 	return 0;
72 | }
73 | 


--------------------------------------------------------------------------------
/tests/test_kthread_wakeup.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * test_kthread_wakeup.c - tests waking of kthreads
 3 |  */
 4 | 
 5 | #include <stdio.h>
 6 | 
 7 | #include <base/stddef.h>
 8 | #include <base/log.h>
 9 | #include <base/time.h>
10 | #include <base/atomic.h>
11 | #include <runtime/runtime.h>
12 | #include <runtime/sync.h>
13 | 
14 | #define NTHREADS	6
15 | #define N		500000
16 | #define SPAWN_LIMIT	5
17 | 
18 | static atomic_t n_threads;
19 | static atomic_t n_spawned;
20 | static waitgroup_t wg;
21 | 
22 | static void work_handler(void *arg)
23 | {
24 | 	int i, ret, n_to_spawn;
25 | 	waitgroup_t *wg_parent = &wg;
26 | 
27 | 	/* do some busy work */
28 | 	delay_us(100);
29 | 
30 | 	if (atomic_read(&n_threads) < NTHREADS) {
31 | 		/* we have too few threads, spawn more */
32 | 		n_to_spawn = rand() % SPAWN_LIMIT;
33 | 
34 | 		if (atomic_dec_and_test(&n_threads) && n_to_spawn == 0)
35 | 			n_to_spawn = 1;
36 | 
37 | 		for (i = 0; i < n_to_spawn; i++) {
38 | 			if (atomic_add_and_fetch(&n_spawned, 1) <= N) {
39 | 				atomic_inc(&n_threads);
40 | 				ret = thread_spawn(work_handler, NULL);
41 | 				BUG_ON(ret);
42 | 			}
43 | 		}
44 | 	} else {
45 | 		/* don't spawn any more */
46 | 		atomic_dec(&n_threads);
47 | 	}
48 | 	waitgroup_done(wg_parent);
49 | }
50 | 
51 | static void main_handler(void *arg)
52 | {
53 | 	int i, ret;
54 | 
55 | 	log_info("started main_handler() thread");
56 | 
57 | 	atomic_write(&n_threads, 0);
58 | 	atomic_write(&n_spawned, 0);
59 | 	waitgroup_init(&wg);
60 | 	waitgroup_add(&wg, N);
61 | 	for (i = 0; i < NTHREADS; i++) {
62 | 		atomic_inc(&n_spawned);
63 | 		atomic_inc(&n_threads);
64 | 		ret = thread_spawn(work_handler, NULL);
65 | 		BUG_ON(ret);
66 | 	}
67 | 
68 | 	waitgroup_wait(&wg);
69 | 	log_info("ran %d threads", N);
70 | }
71 | 
72 | int main(int argc, char *argv[])
73 | {
74 | 	int ret;
75 | 
76 | 	if (argc < 2) {
77 | 		printf("arg must be config file\n");
78 | 		return -EINVAL;
79 | 	}
80 | 
81 | 	ret = runtime_init(argv[1], main_handler, NULL);
82 | 	if (ret) {
83 | 		printf("failed to start runtime\n");
84 | 		return ret;
85 | 	}
86 | 
87 | 	return 0;
88 | }
89 | 


--------------------------------------------------------------------------------
/tests/test_many_threads.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | #include <base/stddef.h>
 4 | #include <base/log.h>
 5 | #include <base/time.h>
 6 | #include <runtime/runtime.h>
 7 | #include <runtime/sync.h>
 8 | 
 9 | #define N		50000
10 | #define NCORES	4
11 | 
12 | static void work_handler(void *arg)
13 | {
14 | 	waitgroup_t *wg_parent = (waitgroup_t *)arg;
15 | 	waitgroup_done(wg_parent);
16 | 	waitgroup_wait(wg_parent);
17 | }
18 | 
19 | static void main_handler(void *arg)
20 | {
21 | 	waitgroup_t wg;
22 | 	double threads_per_second;
23 | 	uint64_t start_us;
24 | 	int i, ret;
25 | 
26 | 	log_info("started main_handler() thread");
27 | 
28 | 	waitgroup_init(&wg);
29 | 	waitgroup_add(&wg, N);
30 | 	start_us = microtime();
31 | 	for (i = 0; i < N; i++) {
32 | 		ret = thread_spawn(work_handler, &wg);
33 | 		BUG_ON(ret);
34 | 		thread_yield();
35 | 	}
36 | 
37 | 	waitgroup_wait(&wg);
38 | 	threads_per_second = (double)N /
39 | 			     ((microtime() - start_us) * 0.000001);
40 | 	log_info("spawned %f threads / second", threads_per_second);
41 | }
42 | 
43 | int main(int argc, char *argv[])
44 | {
45 | 	int ret;
46 | 
47 | 	if (argc < 2) {
48 | 		printf("arg must be config file\n");
49 | 		return -EINVAL;
50 | 	}
51 | 
52 | 	ret = runtime_init(argv[1], main_handler, NULL);
53 | 	if (ret) {
54 | 		printf("failed to start runtime");
55 | 		return ret;
56 | 	}
57 | 
58 | 	return 0;
59 | }
60 | 


--------------------------------------------------------------------------------
/tests/test_multiple_runtimes.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * test_multiple_runtimes.c - tests initialization of multiple runtimes
 3 |  */
 4 | 
 5 | #include <stdio.h>
 6 | #include <unistd.h>
 7 | 
 8 | #include <base/log.h>
 9 | #include <runtime/runtime.h>
10 | #include <runtime/timer.h>
11 | 
12 | #define N_RUNTIMES	2
13 | #define SLEEP_S		5
14 | 
15 | static void main_handler(void *arg)
16 | {
17 | 	int i;
18 | 
19 | 	for (i = 0; i < SLEEP_S; i++)
20 | 		timer_sleep(1000*1000);
21 | 
22 | 	log_info("exiting runtime");
23 | }
24 | 
25 | int main(int argc, char *argv[])
26 | {
27 | 	int i, pid, ret;
28 | 
29 | 	if (argc < 1 + N_RUNTIMES) {
30 | 		printf("arg must provide a config file for each runtime\n");
31 | 		return -EINVAL;
32 | 	}
33 | 
34 | 	for (i = 0; i < N_RUNTIMES; i++) {
35 | 		pid = fork();
36 | 		BUG_ON(pid == -1);
37 | 
38 | 		if (pid == 0) {
39 | 			ret = runtime_init(argv[1 + i], main_handler, NULL);
40 | 			BUG_ON(ret < 0);
41 | 		}
42 | 
43 | 		sleep(1);
44 | 	}
45 | 
46 | 	return 0;
47 | }
48 | 


--------------------------------------------------------------------------------
/tests/test_ping.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * test_ping.c - sends ping echo requests
 3 |  */
 4 | 
 5 | #include <stdio.h>
 6 | 
 7 | #include <base/log.h>
 8 | #include <net/ping.h>
 9 | #include <runtime/runtime.h>
10 | #include <runtime/timer.h>
11 | 
12 | #define N_PINGS 10
13 | #define DEST_IP_ADDR 3232235778 // 192.168.1.2
14 | 
15 | static void main_handler(void *arg)
16 | {
17 | 	int i, ret;
18 | 
19 | 	ret = net_ping_init();
20 | 	if (ret) {
21 | 		log_err("failed to init ping");
22 | 		return;
23 | 	}
24 | 
25 | 	for (i = 0; i < N_PINGS; i++) {
26 | 		net_send_ping(i, DEST_IP_ADDR);
27 | 
28 | 		/* wait 1 second before sending next ping */
29 | 		timer_sleep(1000*1000);
30 | 	}
31 | }
32 | 
33 | int main(int argc, char *argv[])
34 | {
35 | 	int ret;
36 | 
37 | 	if (argc < 2) {
38 | 		printf("arg must be config file\n");
39 | 		return -EINVAL;
40 | 	}
41 | 
42 | 	ret = runtime_init(argv[1], main_handler, NULL);
43 | 	if (ret) {
44 | 		printf("failed to start runtime\n");
45 | 		return ret;
46 | 	}
47 | 
48 | 	return 0;
49 | }
50 | 


--------------------------------------------------------------------------------
/tests/test_runtime_threads.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * test_runtime_thread.c - tests basic thread spawning
 3 |  */
 4 | 
 5 | #include <stdio.h>
 6 | 
 7 | #include <base/stddef.h>
 8 | #include <base/log.h>
 9 | #include <base/time.h>
10 | #include <runtime/runtime.h>
11 | #include <runtime/sync.h>
12 | 
13 | #define N		1000000
14 | #define NCORES		4
15 | 
16 | static void leaf_handler(void *arg)
17 | {
18 | 	waitgroup_t *wg_parent = (waitgroup_t *)arg;
19 | 	delay_us(1);
20 | 	waitgroup_done(wg_parent);
21 | }
22 | 
23 | static void work_handler(void *arg)
24 | {
25 | 	waitgroup_t *wg_parent = (waitgroup_t *)arg;
26 | 	waitgroup_t wg;
27 | 	int i, ret;
28 | 
29 | 	waitgroup_init(&wg);
30 | 	waitgroup_add(&wg, N);
31 | 	for (i = 0; i < N; i++) {
32 | 		ret = thread_spawn(leaf_handler, &wg);
33 | 		BUG_ON(ret);
34 | 		thread_yield();
35 | 	}
36 | 
37 | 	waitgroup_wait(&wg);
38 | 	waitgroup_done(wg_parent);
39 | }
40 | 
41 | static void main_handler(void *arg)
42 | {
43 | 	waitgroup_t wg;
44 | 	double threads_per_second;
45 | 	uint64_t start_us;
46 | 	int i, ret;
47 | 
48 | 	log_info("started main_handler() thread");
49 | 	log_info("creating threads with 1us of fake work.");
50 | 
51 | 	waitgroup_init(&wg);
52 | 	waitgroup_add(&wg, NCORES);
53 | 	start_us = microtime();
54 | 	for (i = 0; i < NCORES; i++) {
55 | 		ret = thread_spawn(work_handler, &wg);
56 | 		BUG_ON(ret);
57 | 	}
58 | 
59 | 	waitgroup_wait(&wg);
60 | 	threads_per_second = (double)(NCORES * N) /
61 | 			     ((microtime() - start_us) * 0.000001);
62 | 	log_info("spawned %f threads / second, efficiency %f",
63 | 		 threads_per_second, threads_per_second / 1000000);
64 | }
65 | 
66 | int main(int argc, char *argv[])
67 | {
68 | 	int ret;
69 | 
70 | 	if (argc < 2) {
71 | 		printf("arg must be config file\n");
72 | 		return -EINVAL;
73 | 	}
74 | 
75 | 	ret = runtime_init(argv[1], main_handler, NULL);
76 | 	if (ret) {
77 | 		printf("failed to start runtime\n");
78 | 		return ret;
79 | 	}
80 | 
81 | 	return 0;
82 | }
83 | 


--------------------------------------------------------------------------------
/tests/test_runtime_timer.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * test_runtime_thread.c - tests basic thread spawning
 3 |  */
 4 | 
 5 | #include <stdio.h>
 6 | 
 7 | #include <base/stddef.h>
 8 | #include <base/log.h>
 9 | #include <base/time.h>
10 | #include <runtime/runtime.h>
11 | #include <runtime/sync.h>
12 | #include <runtime/timer.h>
13 | 
14 | #define WORKERS		1000
15 | #define N		100000
16 | 
17 | static void work_handler(void *arg)
18 | {
19 | 	waitgroup_t *wg_parent = (waitgroup_t *)arg;
20 | 	int i;
21 | 
22 | 	for (i = 0; i < N; i++)
23 | 		timer_sleep(2);
24 | 
25 | 	waitgroup_done(wg_parent);
26 | }
27 | 
28 | static void main_handler(void *arg)
29 | {
30 | 	waitgroup_t wg;
31 | 	double timeouts_per_second;
32 | 	uint64_t start_us;
33 | 	int i, ret;
34 | 
35 | 	log_info("started main_handler() thread");
36 | 
37 | 	waitgroup_init(&wg);
38 | 	waitgroup_add(&wg, WORKERS);
39 | 	start_us = microtime();
40 | 	for (i = 0; i < WORKERS; i++) {
41 | 		ret = thread_spawn(work_handler, &wg);
42 | 		BUG_ON(ret);
43 | 	}
44 | 
45 | 	waitgroup_wait(&wg);
46 | 	timeouts_per_second = (double)(WORKERS * N) /
47 | 		((microtime() - start_us) * 0.000001);
48 | 	log_info("handled %f timeouts / second", timeouts_per_second);
49 | }
50 | 
51 | int main(int argc, char *argv[])
52 | {
53 | 	int ret;
54 | 
55 | 	if (argc < 2) {
56 | 		printf("arg must be config file\n");
57 | 		return -EINVAL;
58 | 	}
59 | 
60 | 	ret = runtime_init(argv[1], main_handler, NULL);
61 | 	if (ret) {
62 | 		printf("failed to start runtime\n");
63 | 		return ret;
64 | 	}
65 | 
66 | 	return 0;
67 | }
68 | 


--------------------------------------------------------------------------------
/tests/test_storage.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * test_storage.c - writes and reads to the storage device
 3 |  */
 4 | 
 5 | #include <stdio.h>
 6 | #include <time.h>
 7 | 
 8 | #include <base/log.h>
 9 | #include <runtime/runtime.h>
10 | #include <runtime/storage.h>
11 | #include <runtime/timer.h>
12 | #include <runtime/sync.h>
13 | 
14 | static void main_handler(void *arg)
15 | {
16 | 	int ret;
17 | 	uint32_t block_size;
18 | 	char *buf;
19 | 
20 | 	block_size = storage_block_size();
21 | 	log_info("num blocks: %lu", storage_num_blocks());
22 | 	log_info("block size: %u", block_size);
23 | 	log_info("writing 'hello world' to device...");
24 | 	if (block_size == 0) {
25 | 		log_info("storage support is disabled, skipping test");
26 | 		return;
27 | 	}
28 | 	buf = malloc(block_size);
29 | 	BUG_ON(!buf);
30 | 	sprintf(buf, "hello world");
31 | 	ret = storage_write(buf, 0, 1);
32 | 	if (ret) {
33 | 		log_err("failed to init storage");
34 | 		return;
35 | 	}
36 | 	sprintf(buf, "cleared");
37 | 
38 | 	log_debug("reading from device...");
39 | 	ret = storage_read(buf, 0, 1);
40 | 	if (ret) {
41 | 		log_err("failed to read");
42 | 	}
43 | 	log_info("data read: %s", buf);
44 | 	free(buf);
45 | }
46 | 
47 | int main(int argc, char *argv[])
48 | {
49 | 	int ret;
50 | 
51 | 	ret = runtime_init(argv[1], main_handler, NULL);
52 | 	if (ret) {
53 | 		log_err("failed to start runtime");
54 | 		return ret;
55 | 	}
56 | 	return 0;
57 | }
58 | 


--------------------------------------------------------------------------------
/tests/test_storage_iops.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * test_storage_iops.c - tests write IOPS for storage device using shenango runtime
 3 |  */
 4 | 
 5 | #include <stdio.h>
 6 | 
 7 | #include <base/atomic.h>
 8 | #include <base/stddef.h>
 9 | #include <base/log.h>
10 | #include <base/time.h>
11 | #include <runtime/runtime.h>
12 | #include <runtime/thread.h>
13 | #include <runtime/sync.h>
14 | #include <runtime/timer.h>
15 | #include <runtime/storage.h>
16 | 
17 | 
18 | #define WORKERS		100
19 | #define N		100000
20 | 
21 | static void work_handler(void *arg)
22 | {
23 | 	static atomic_t thread_counter;
24 | 	waitgroup_t *wg_parent = (waitgroup_t *)arg;
25 | 	int i, tid;
26 | 	char *p;
27 | 
28 | 
29 | 	p = malloc(4096);
30 | 	BUG_ON(!p);
31 | 
32 | 	tid = atomic_fetch_and_add(&thread_counter, 1);
33 | 
34 | 	for (i = 0; i < N; i++)
35 | 		BUG_ON(storage_write(p, 8 * (tid * N + i), 8));
36 | 
37 | 	waitgroup_done(wg_parent);
38 | }
39 | 
40 | static void main_handler(void *arg)
41 | {
42 | 	waitgroup_t wg;
43 | 	double iops;
44 | 	uint64_t start_us;
45 | 	int i, ret;
46 | 
47 | 	log_info("started main_handler() thread");
48 | 
49 | 	BUG_ON(8 * (N + 1) * WORKERS > storage_num_blocks());
50 | 
51 | 	waitgroup_init(&wg);
52 | 	waitgroup_add(&wg, WORKERS);
53 | 	start_us = microtime();
54 | 	for (i = 0; i < WORKERS; i++) {
55 | 		ret = thread_spawn(work_handler, &wg);
56 | 		BUG_ON(ret);
57 | 	}
58 | 
59 | 	waitgroup_wait(&wg);
60 | 	iops = (double)(WORKERS * N) /
61 | 		((microtime() - start_us) * 0.000001);
62 | 	log_info("handled %f IOPS", iops);
63 | }
64 | 
65 | int main(int argc, char *argv[])
66 | {
67 | 	int ret;
68 | 
69 | 	if (argc < 2) {
70 | 		printf("arg must be config file\n");
71 | 		return -EINVAL;
72 | 	}
73 | 
74 | 	ret = runtime_init(argv[1], main_handler, NULL);
75 | 	if (ret) {
76 | 		printf("failed to start runtime\n");
77 | 		return ret;
78 | 	}
79 | 
80 | 	return 0;
81 | }
82 | 


--------------------------------------------------------------------------------
/victim.config:
--------------------------------------------------------------------------------
 1 | # an example runtime config file
 2 | host_addr 192.168.1.8
 3 | host_netmask 255.255.255.0
 4 | host_gateway 192.168.1.1
 5 | runtime_kthreads 10
 6 | runtime_guaranteed_kthreads 10
 7 | runtime_spinning_kthreads 0
 8 | runtime_priority lc
 9 | runtime_ht_punish_us 100
10 | runtime_qdelay_us 10
11 | 


--------------------------------------------------------------------------------