├── .gitignore
├── Makefile
├── README.md
├── balance.txt
├── cloudlab
    ├── README.md
    ├── bash_profile
    ├── bashrc
    ├── bin
    │   ├── ckill
    │   ├── config
    │   ├── install_homa
    │   ├── on_nodes
    │   ├── set_cutoffs
    │   ├── switch.py
    │   └── update_linux
    ├── config_switch
    ├── gdbinit
    └── update
├── dissector
    ├── CMakeLists.txt
    ├── README.md
    └── homa.c
├── homa.h
├── homa_api.c
├── homa_grant.c
├── homa_grant.h
├── homa_impl.h
├── homa_incoming.c
├── homa_metrics.c
├── homa_metrics.h
├── homa_offload.c
├── homa_offload.h
├── homa_outgoing.c
├── homa_peer.c
├── homa_peer.h
├── homa_plumbing.c
├── homa_pool.c
├── homa_pool.h
├── homa_receiver.cc
├── homa_receiver.h
├── homa_rpc.c
├── homa_rpc.h
├── homa_skb.c
├── homa_skb.h
├── homa_sock.c
├── homa_sock.h
├── homa_stub.h
├── homa_timer.c
├── homa_utils.c
├── homa_wire.h
├── man
    ├── Makefile
    ├── homa.7
    ├── homa_abort.3
    ├── homa_reply.3
    ├── homa_send.3
    ├── recvmsg.2
    └── sendmsg.2
├── notes.txt
├── perf.txt
├── perf
    ├── README.md
    ├── plot_length_cdf.py
    └── rtt.xlsx
├── protocol.md
├── reap.txt
├── rsync-exclude.txt
├── sync.txt
├── test
    ├── Makefile
    ├── README.md
    ├── ccutils.cc
    ├── ccutils.h
    ├── kselftest_harness.h
    ├── main.c
    ├── mergedep.pl
    ├── mock.c
    ├── mock.h
    ├── unit_homa_grant.c
    ├── unit_homa_incoming.c
    ├── unit_homa_metrics.c
    ├── unit_homa_offload.c
    ├── unit_homa_outgoing.c
    ├── unit_homa_peer.c
    ├── unit_homa_plumbing.c
    ├── unit_homa_pool.c
    ├── unit_homa_rpc.c
    ├── unit_homa_skb.c
    ├── unit_homa_sock.c
    ├── unit_homa_timer.c
    ├── unit_homa_utils.c
    ├── unit_timetrace.c
    ├── utils.c
    └── utils.h
├── timetrace.c
├── timetrace.h
└── util
    ├── Makefile
    ├── README.md
    ├── avg.py
    ├── buffer_client.c
    ├── buffer_server.c
    ├── cp_basic
    ├── cp_both
    ├── cp_buffers
    ├── cp_client_threads
    ├── cp_config
    ├── cp_config_buf
    ├── cp_load
    ├── cp_mtu
    ├── cp_node.cc
    ├── cp_server_ports
    ├── cp_tcp
    ├── cp_tcp_config
    ├── cp_vs_tcp
    ├── cperf.py
    ├── diff_metrics.py
    ├── diff_rtts.py
    ├── dist.cc
    ├── dist.h
    ├── dist_test.cc
    ├── dist_to_proto.cc
    ├── get_time_trace.c
    ├── get_traces
    ├── homa_prio.cc
    ├── homa_test.cc
    ├── inc_tput.cc
    ├── metrics.py
    ├── plot.py
    ├── plot_tthoma.py
    ├── receive_raw.c
    ├── rpcid.py
    ├── scratch.c
    ├── send_many
    ├── send_raw.c
    ├── server.cc
    ├── service.py
    ├── smi.cc
    ├── smi.py
    ├── strip.py
    ├── test_time_trace.c
    ├── test_utils.cc
    ├── test_utils.h
    ├── time_trace.cc
    ├── time_trace.h
    ├── tput.py
    ├── ttgrep.py
    ├── tthoma.py
    ├── ttmerge.py
    ├── ttoffset.py
    ├── ttprint.py
    ├── ttrange.py
    ├── ttsum.py
    ├── ttsync.py
    ├── ttsyslog.py
    └── use_memory.c


/.gitignore:
--------------------------------------------------------------------------------
 1 | .*.swp
 2 | \#*#
 3 | *.pyc
 4 | *.o
 5 | *.hi
 6 | *.dump
 7 | *.log
 8 | *.rej
 9 | *.orig
10 | *.patch
11 | *.diff
12 | .tags*
13 | .deps
14 | *.pdf
15 | *.tt
16 | *.out
17 | 
18 | # Ignore IDE files
19 | /.idea/
20 | /nbproject/
21 | 
22 | reports/
23 | traces/
24 | bytedance/
25 | saved_traces/


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # Makefile to build Homa as a Linux module.
 2 | 
 3 | HOMA_OBJS := homa_grant.o \
 4 | 	homa_incoming.o \
 5 | 	homa_metrics.o \
 6 | 	homa_offload.o \
 7 | 	homa_outgoing.o \
 8 | 	homa_peer.o \
 9 | 	homa_pool.o \
10 | 	homa_plumbing.o \
11 | 	homa_rpc.o \
12 | 	homa_skb.o \
13 | 	homa_sock.o \
14 | 	homa_timer.o \
15 | 	homa_utils.o \
16 | 	timetrace.o
17 | 
18 | ifneq ($(KERNELRELEASE),)
19 | 
20 | obj-m += homa.o
21 | homa-y = $(HOMA_OBJS)
22 | 
23 | MY_CFLAGS += -g
24 | ccflags-y += ${MY_CFLAGS}
25 | CC += ${MY_CFLAGS}
26 | 
27 | else
28 | 
29 | ifneq ($(KERNEL_SRC),)
30 | # alternatively to variable KDIR accept variable KERNEL_SRC as used in
31 | # PetaLinux/Yocto for example
32 | KDIR ?= $(KERNEL_SRC)
33 | endif
34 | 
35 | KDIR ?= /lib/modules/$(shell uname -r)/build
36 | 
37 | all:
38 | 	$(MAKE) -C $(KDIR) M=$(shell pwd) modules
39 | 
40 | install:
41 | 	$(MAKE) -C $(KDIR) M=$(shell pwd) modules_install
42 | 
43 | check:
44 | 	../homaLinux/scripts/kernel-doc -none *.c
45 | 
46 | # Copy stripped source files to a Linux source tree
47 | LINUX_SRC_DIR ?= ../net-next
48 | HOMA_TARGET ?= $(LINUX_SRC_DIR)/net/homa
49 | CP_HDRS := homa_impl.h \
50 | 	   homa_peer.h \
51 | 	   homa_pool.h \
52 | 	   homa_rpc.h \
53 | 	   homa_sock.h \
54 | 	   homa_stub.h \
55 | 	   homa_wire.h
56 | CP_SRCS := $(patsubst %.o,%.c,$(filter-out timetrace.o, $(HOMA_OBJS)))
57 | CP_TARGETS := $(patsubst %,$(HOMA_TARGET)/%,$(CP_HDRS) $(CP_SRCS))
58 | net-next: $(CP_TARGETS) $(LINUX_SRC_DIR)/include/uapi/linux/homa.h
59 | $(HOMA_TARGET)/%: % util/strip.py
60 | 	util/strip.py $< > $@
61 | $(LINUX_SRC_DIR)/include/uapi/linux/homa.h: homa.h util/strip.py
62 | 	util/strip.py $< > $@
63 | 
64 | clean:
65 | 	$(MAKE) -C $(KDIR) M=$(shell pwd) clean
66 | 
67 | # The following targets are useful for debugging Makefiles; they
68 | # print the value of a make variable in one of several contexts.
69 | print-%:
70 | 	@echo $* = $($*)
71 | 
72 | printBuild-%:
73 | 	$(MAKE) -C $(KDIR) M=$(shell pwd) $@
74 | 
75 | printClean-%:
76 | 	$(MAKE) -C $(KDIR) M=$(shell pwd) $@
77 | 
78 | endif
79 | 


--------------------------------------------------------------------------------
/balance.txt:
--------------------------------------------------------------------------------
  1 | This file discusses the issue of load-balancing in Homa.
  2 | 
  3 | In order to keep up with fast networks, transport protocols must distribute
  4 | their processing across multiple cores. For outgoing packets this happens
  5 | naturally: sending threads run on different cores and packet processing
  6 | for outbound packets happens on the same core is the sending thread. Things
  7 | are more difficult for incoming packets. In general, an incoming packet
  8 | will pass through 3 cores:
  9 | * NAPI/GRO: the NIC distributes incoming packets across cores using RSS.
 10 |   The number of incoming channels, and their association with cores, can
 11 |   be configured in software. The NIC will then distribute packets across
 12 |   those channels using a hash based on packet header fields. The device
 13 |   driver receives packets as part of NAPI, then packets are collected into
 14 |   batches using GRO and handed off to SoftIRQ.
 15 | * SoftIRQ processing occurs on a (potentially) different core from NAPI/GRO;
 16 |   the network stack runs here, including Homa's main handlers for incoming
 17 |   packets. The system default is to compute another hash function on packet
 18 |   headers to select a SoftIRQ or for a batch, but it is possible for GRO
 19 |   to make its own choice of core, and Homa does this.
 20 | * Once a complete message is received, it is handed off to an application
 21 |   thread, which typically runs on a different core.
 22 | 
 23 | The load balancing challenge is to distribute load across multiple cores
 24 | without overloading any individual core ("hotspots"). This has proven
 25 | quite difficult, and hotspots are the primary source of tail latency in Homa.
 26 | The most common cause of hotspots is when 2 or more of the above tasks
 27 | are assigned to the same core. For example:
 28 | * Two batches from different NAPI/GRO cores might get assigned to the same
 29 |   SoftIRQ core.
 30 | * A particular core might be very busy handling NAPI/GRO for a stream of
 31 |   packets in a large message; this will prevent application threads from
 32 |   making progress on that core. A short message might pass through other
 33 |   cores for NAPI/GRO and SoftIRQ, but if its application is running on
 34 |   the busy core, then it will not able to process the short message.
 35 | 
 36 | Part of the problem is that core assignments are made independently by
 37 | 3 different schedulers (RSS for the NAPI/GRO core, GRO or the system for
 38 | the SoftIRQ core, and the Linux scheduler for the application core),
 39 | so conflicts are likely to occur. Only one of these schedulers is under
 40 | control of the transport protocol.
 41 | 
 42 | It's also important to note that using more cores isn't always the best
 43 | approach. For example, if a node is lightly loaded, it would be best to
 44 | do all RX processing on a single core: using multiple cores causes extra
 45 | cache misses as data migrates from core to core, and it also adds latency
 46 | to pass control between cores. In an ideal world, the number of cores used for
 47 | protocol processing would be just enough to keep any of them from getting
 48 | overloaded. However, it appears to be hard to vary the number of cores
 49 | without risking overloads; except in a few special cases, Homa doesn't do
 50 | this.
 51 | 
 52 | Homa tries to use its control over SoftIRQ scheduling to minimize hotspots.
 53 | Several different approaches have been tried over time; this document
 54 | focuses on the two most recent ones, which are called "Gen2" and "Gen3".
 55 | 
 56 | Gen2 Load Balancing
 57 | -------------------
 58 | * Gen2 assumes that NAPI/GRO processing is occurring on all cores.
 59 | * When GRO chooses where to assign a batch of packets for SoftIRQ, it
 60 |   considers the next several cores (in ascending circular core order
 61 |   after the GRO core).
 62 | * GRO uses several criteria to try to find a "good" core for SoftIRQ, such
 63 |   as avoiding a core that has done recent GRO processing, or one for which
 64 |   there is already pending SoftIRQ work.
 65 | * Selection stops as soon as it finds a "good" core.
 66 | * If no "good" core is found, then GRO will rotate among the successor
 67 |   cores on a batch-by-batch basis.
 68 | * In some cases, Gen2 will bypass the SoftIRQ handoff mechanism and simply
 69 |   run SoftIRQ immediately on its core. This is done in two cases: short
 70 |   packets and grant packets. Bypass is particularly useful for grants
 71 |   because it eliminates the latency associated with a handoff, and grant
 72 |   turnaround time is important for overall performance.
 73 | 
 74 | Gen2 has several problems:
 75 | * It doesn't do anything about the problem of application threads conflicting
 76 |   with NAPI/GRO or SoftIRQ.
 77 | * A single core may be assigned both SoftIRQ and NAPI/GRO work at the
 78 |   same time.
 79 | * The SoftIRQ core groups for different NAPI/GRO cores overlap, so it's
 80 |   possible for multiple GROs to schedule batches to the same SoftIRQ core.
 81 | * When receiving packets from a large message, Gen2 tends to alternate between
 82 |   2 or more SoftIRQ cores, which results in unnecessary cache coherency
 83 |   traffic.
 84 | * If the NAPI/GRO core is overloaded, bypass can make things worse (especially
 85 |   since grant processing results in transmitting additional packets, which
 86 |   is fairly expensive).
 87 | 
 88 | Gen3 Load Balancing
 89 | -------------------
 90 | The Gen3 load-balancing mechanism is an attempt to solve the problems
 91 | associated with Gen2.
 92 | * The number of channels is reduced, so that only 1/4 of the cores do
 93 |   NAPI/GRO processing. This appears to be sufficient capacity to avoid
 94 |   overloads on any of the NAPI/GRO cores.
 95 | * Each NAPI/GRO core has 3 other cores (statically assigned) that it can use
 96 |   for SoftIRQ processing. The SoftIRQ core groups for different NAPI/GRO
 97 |   cores do not overlap. This means that SoftIRQ and GRO will never happen
 98 |   simultaneously on the same core, and there will be no conflicts between
 99 |   the SoftIRQ groups of different NAPI/GRO cores.
100 | * Gen3 takes steps to avoid core conflicts between application threads and
101 |   NAPI/GRO and SoftIRQ processing, as described below.
102 | * When an application thread is using Homa actively on a core, the core
103 |   is marked as "busy". When GRO selects a SoftIRQ core, it attempts to
104 |   avoid cores that are busy with application threads. If there is a choice
105 |   of un-busy cores, GRO will try to reuse a single SoftIRQ over and over.
106 | * Homa also keeps track of recent NAPI/GRO and SoftIRQ processing on each
107 |   core. When an incoming message becomes ready and there are multiple threads
108 |   waiting for messages, Homa tries to pick a thread whose core has not had
109 |   recent Homa activity.
110 | * Between these two mechanisms, the hope is that SoftIRQ and application
111 |   work will adjust their core assignments to avoid conflicts.
112 | 
113 | Gen3 was implemented in November of 2023; so far its performance appears to be
114 | about the same as Gen2 (slightly worse for W2 and W3, slightly better for W5).
115 | Gen3 performance on W3 appears highly variable: P99 latency can vary by 5-10x
116 | from run to run; as of December 2023 the reasons for this have not been
117 | determined.


--------------------------------------------------------------------------------
/cloudlab/README.md:
--------------------------------------------------------------------------------
1 | This directory contains scripts and other supporting files for testing
2 | Homa on the CloudLab cluster. Miscellaneous notes:
3 | 
4 | - Everything in the bin directory will be copied to ~/bin on CloudLab.
5 | 


--------------------------------------------------------------------------------
/cloudlab/bash_profile:
--------------------------------------------------------------------------------
 1 | export LANG=C
 2 | export VISUAL=vi
 3 | 
 4 | # The following lines arrange for the current git branch to
 5 | # appear in the shell prompt.
 6 | 
 7 | parse_git_branch() {
 8 |   ref=$(git symbolic-ref HEAD -q 2>/dev/null)
 9 |   st=$?
10 |   if [ $st -eq 1 ]; then
11 |     echo "[detached]"
12 |   elif [ $st -eq 0 ]; then
13 |     echo " [${ref#refs/heads/}]"
14 |   fi
15 | }
16 | 
17 | PS1="${debian_chroot:+($debian_chroot)}\u@\h:\w\$(parse_git_branch)\$ "
18 | XTERM_TITLE="\w$(parse_git_branch)"
19 | PS1="\n\[\e]0;$XTERM_TITLE\a\]\[\e[1;32m\]$PS1\[\e[1;37m\]"
20 | 
21 | . ~/.bashrc
22 | 


--------------------------------------------------------------------------------
/cloudlab/bashrc:
--------------------------------------------------------------------------------
  1 | # ~/.bashrc: executed by bash(1) for non-login shells.
  2 | # see /usr/share/doc/bash/examples/startup-files (in the package bash-doc)
  3 | # for examples
  4 | 
  5 | PATH=/opt/gradle-7.3/bin:/$USER/install/bin:~/homaModule/util:~/homaModule/perf:~/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
  6 | 
  7 | export LD_LIBRARY_PATH=/usr/local/lib:/usr/local/lib/x86_64-linux-gnu
  8 | 
  9 | export PYTHONPATH=/users/$USER/homaModule/util:/users/$USER/bin
 10 | 
 11 | export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64
 12 | 
 13 | # If not running interactively, don't do anything
 14 | [ -z "$PS1" ] && return
 15 | 
 16 | # don't put duplicate lines in the history. See bash(1) for more options
 17 | # don't overwrite GNU Midnight Commander's setting of `ignorespace'.
 18 | export HISTCONTROL=$HISTCONTROL${HISTCONTROL+,}ignoredups
 19 | # ... or force ignoredups and ignorespace
 20 | export HISTCONTROL=ignoreboth
 21 | 
 22 | # append to the history file, don't overwrite it
 23 | shopt -s histappend
 24 | 
 25 | # for setting history length see HISTSIZE and HISTFILESIZE in bash(1)
 26 | 
 27 | # check the window size after each command and, if necessary,
 28 | # update the values of LINES and COLUMNS.
 29 | shopt -s checkwinsize
 30 | 
 31 | # make less more friendly for non-text input files, see lesspipe(1)
 32 | [ -x /usr/bin/lesspipe ] && eval "$(SHELL=/bin/sh lesspipe)"
 33 | 
 34 | # set variable identifying the chroot you work in (used in the prompt below)
 35 | if [ -z "$debian_chroot" ] && [ -r /etc/debian_chroot ]; then
 36 |     debian_chroot=$(cat /etc/debian_chroot)
 37 | fi
 38 | 
 39 | # set a fancy prompt (non-color, unless we know we "want" color)
 40 | case "$TERM" in
 41 |     xterm-color) color_prompt=yes;;
 42 | esac
 43 | 
 44 | # uncomment for a colored prompt, if the terminal has the capability; turned
 45 | # off by default to not distract the user: the focus in a terminal window
 46 | # should be on the output of commands, not on the prompt
 47 | force_color_prompt=yes
 48 | 
 49 | if [ -n "$force_color_prompt" ]; then
 50 |     if [ -x /usr/bin/tput ] && tput setaf 1 >&/dev/null; then
 51 | 	# We have color support; assume it's compliant with Ecma-48
 52 | 	# (ISO/IEC-6429). (Lack of such support is extremely rare, and such
 53 | 	# a case would tend to support setf rather than setaf.)
 54 | 	color_prompt=yes
 55 |     else
 56 | 	color_prompt=
 57 |     fi
 58 | fi
 59 | 
 60 | # If this is an xterm set the title to user@host:dir
 61 | case "$TERM" in
 62 | xterm*|rxvt*)
 63 |     PS1="\[\e]0;${debian_chroot:+($debian_chroot)}\u@\h: \w\a\]$PS1"
 64 |     ;;
 65 | *)
 66 |     ;;
 67 | esac
 68 | 
 69 | # Alias definitions.
 70 | # You may want to put all your additions into a separate file like
 71 | # ~/.bash_aliases, instead of adding them here directly.
 72 | # See /usr/share/doc/bash-doc/examples in the bash-doc package.
 73 | 
 74 | #if [ -f ~/.bash_aliases ]; then
 75 | #    . ~/.bash_aliases
 76 | #fi
 77 | 
 78 | # enable color support of ls and also add handy aliases
 79 | if [ -x /usr/bin/dircolors ]; then
 80 |     eval "`dircolors -b`"
 81 |     alias ls='ls --color=auto'
 82 |     alias dir='dir --color=auto'
 83 |     alias vdir='vdir --color=auto'
 84 | 
 85 |     alias grep='grep --color=auto'
 86 |     alias fgrep='fgrep --color=auto'
 87 |     alias egrep='egrep --color=auto'
 88 | fi
 89 | 
 90 | # some more ls aliases
 91 | alias ll='ls -l'
 92 | alias la='ls -A'
 93 | #alias l='ls -CF'
 94 | alias jdebug='java -Xdebug -Xrunjdwp:transport=dt_socket,server=y,address=9000'
 95 | 
 96 | # enable programmable completion features (you don't need to enable
 97 | # this, if it's already enabled in /etc/bash.bashrc and /etc/profile
 98 | # sources /etc/bash.bashrc).
 99 | if [ -f /etc/bash_completion ]; then
100 |     . /etc/bash_completion
101 | fi
102 | 
103 | alias gitmods="git status --short | awk '{print(\$2);}'"
104 | 
105 | alias makemore='make 2>&1 | more'
106 | 
107 | id ()
108 | {
109 |     ttgrep.py "id $1" node.tt
110 | }
111 | 
112 | cpid()
113 | {
114 |     ttgrep.py "id $1" cp.tt
115 | }
116 | 
117 | core ()
118 | {
119 |     ttgrep.py "[C$1]" node.tt > core.tt
120 |     less +G core.tt
121 | }
122 | 
123 | thread ()
124 | {
125 |     ttgrep.py "[$1]" cp.tt > thread.tt
126 |     less +G thread.tt
127 | }
128 | 
129 | cid ()
130 | {
131 |     ttgrep.py "cid $1" cp.tt > cid.tt
132 |     less +G cid.tt
133 | }
134 | 
135 | pid ()
136 | {
137 |     ttgrep.py "pid $1" $2 > pid.tt
138 |     less +G pid.tt
139 | }
140 | 
141 | export GRPC_VERBOSITY=INFO


--------------------------------------------------------------------------------
/cloudlab/bin/ckill:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (c) 2020-2023 Homa Developers
 4 | # SPDX-License-Identifier: BSD-1-Clause
 5 | 
 6 | # Kill processes with a given name on a cluster of machines.
 7 | #
 8 | # Usage:
 9 | # ckill name num_nodes [first]
10 | #
11 | # "name" gives the name of the process to be killed (suitable as an
12 | # argument to "pidof"). The "num_nodes" arguments indicates how many
13 | # servers should be examined for processes to kill. The "first" argument
14 | # is optional; it is an integer identifying the first node on which
15 | # killing will occur (e.g. "ckill cp_node 4 2" means node2 through node5 will be
16 | # updated will be searched. "first" defaults to 0.
17 | 
18 | root=~/homaModule
19 | 
20 | if [ $# -eq 3 ]; then
21 |     first=$3
22 | elif [ $# -eq 2 ]; then
23 |     first=0
24 | else
25 |     echo "Usage: ckill name num_nodes [first]"
26 |     exit 1
27 | fi
28 | last=`expr $first + $2 - 1`
29 | 
30 | for ((i = $first ; i <= $last; i++)); do
31 |     node=node$i
32 |     echo "ssh node$i 'kill \`pidof $1\`'"
33 |     ssh node$i "kill \`pidof $1\`"
34 | done


--------------------------------------------------------------------------------
/cloudlab/bin/install_homa:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (c) 2020-2023 Homa Developers
 4 | # SPDX-License-Identifier: BSD-1-Clause
 5 | 
 6 | # This script installs all of the files needed to run Homa tests on one
 7 | # or more target machines; it also loads the Homa kernel module.
 8 | #
 9 | # Usage:
10 | # install_homa [--net-next] num_nodes [first]
11 | #
12 | # The "num_nodes" arguments indicates how many servers should be updated.
13 | # The "first" argument is optional; it is an integer identifying the
14 | # first node on which installation will occur (e.g. "install 4 2" means
15 | # node2 through node5 will be updated. "first" defaults to 0.
16 | # This script assumes that the Homa module binary (homa.ko) has already
17 | # been built. If --net-next is specified, it will be in the kernel build
18 | # directory (see code below for path), otherwise it will be in ~/homaModule.
19 | # In addition, the utility programs in ~/homaModule/util must have been built.
20 | 
21 | root=~/homaModule
22 | 
23 | set -e
24 | 
25 | homa_ko=$root/homa.ko
26 | if [ $1 = "--net-next" ]; then
27 |     homa_ko=/netnext/net-next/net/homa/homa.ko
28 |     shift
29 | fi
30 | if [ $# -eq 2 ]; then
31 |     first=$2
32 | elif [ $# -eq 1 ]; then
33 |     first=0
34 | else
35 |     echo "Usage: install_homa [--net-next] num_nodes [first]"
36 |     exit 1
37 | fi
38 | last=`expr $first + $1 - 1` || true
39 | 
40 | for ((i = $first ; i <= $last; i++)); do
41 |     node=node$i
42 |     echo
43 |     echo '*** Installing Homa on' $node '***'
44 |     rsync --ipv4 -e "ssh -4 -o StrictHostKeyChecking=no" -rtv ~/.bashrc ~/.bash_profile ~/.gdbinit $node:
45 |     rsync --ipv4 -e "ssh -4 -o StrictHostKeyChecking=no" -rtv --exclude __pycache__ ~/bin/ $node:bin/
46 |     rsync --ipv4 -e "ssh -4 -o StrictHostKeyChecking=no" -rtv $homa_ko $root/util/cp_node $root/util/homa_prio $root/util/*.py $node:bin/
47 |     ssh -4 $node 'sudo sysctl .kernel.printk="5 4 1 7"'
48 |     ssh -4 $node 'echo $PATH'
49 |     ssh -4 $node 'config default'
50 | done


--------------------------------------------------------------------------------
/cloudlab/bin/on_nodes:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (c) 2020-2023 Homa Developers
 4 | # SPDX-License-Identifier: BSD-1-Clause
 5 | 
 6 | # This uses ssh to run a given command on one or more nodes in the
 7 | # cluster.
 8 | #
 9 | # Usage:
10 | # on_nodes num_first last cmd arg arg ...
11 | #
12 | # The "first" and "last" arguments give the range of nodes (inclusive) on
13 | # which the command should run. The remaining arguments are a command
14 | # and its arguments to run on the given machines
15 | 
16 | if [ $# -lt 3 ]; then
17 |     echo "Usage: on_nodes first last cmd arg arg ..."
18 |     exit 1
19 | fi
20 | first=$1
21 | last=$2
22 | shift 2
23 | 
24 | for ((i = $first ; i <= $last; i++)); do
25 |     node=node$i
26 |     echo ""
27 |     echo $node:
28 |     ssh $node $@
29 | done


--------------------------------------------------------------------------------
/cloudlab/bin/set_cutoffs:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (c) 2020-2023 Homa Developers
 4 | # SPDX-License-Identifier: BSD-1-Clause
 5 | 
 6 | # This script sets the cutoffs for unscheduled priorities on one or more
 7 | # nodes to match the characteristics of the Homa workloads.
 8 | #
 9 | # Usage:
10 | # set_cutoffs workload num_nodes [first]
11 | #
12 | # The "workload" argument must be one of w1-w5; the cutoffs will be set
13 | # to values appropriate for that workload. The "num_nodes" arguments indicates
14 | # how many servers should be updated. The "first" argument is optional; it
15 | # is an integer identifying the first node on which installation will occur
16 | # (e.g. "set_cutoffs w3 4 2" means node2 through node5 will be updated.
17 | # "first" defaults to 0.
18 | 
19 | root=~/homaModule
20 | 
21 | if [ $# -eq 3 ]; then
22 |     first=$3
23 | elif [ $# -eq 2 ]; then
24 |     first=0
25 | else
26 |     echo "Usage: set_cutoffs workload num_nodes [first]"
27 |     exit 1
28 | fi
29 | last=`expr $first + $2 - 1`
30 | 
31 | if [ $1 = w1 ]; then
32 |     cutoffs="1000000 12288 2112 1280 832 576 384 192"
33 | elif [ $1 = w2 ]; then
34 |     cutoffs="1000000 1000000 1000000 7168 1920 640 448 320"
35 | elif [ $1 = w3 ]; then
36 |     cutoffs="1000000 1000000 1000000 1000000 1000000 63488 12288 3008"
37 | elif [ $1 = w4 ]; then
38 |     cutoffs="1000000 1000000 1000000 1000000 1000000 1000000 1000000 68608"
39 | elif [ $1 = w5 ]; then
40 |     cutoffs="1000000 1000000 1000000 1000000 1000000 1000000 1000000 1000000"
41 | else
42 |     echo "Unknown workload '$1'; must be w1-w5"
43 |     exit 1
44 | fi
45 | 
46 | for ((i = $first ; i <= $last; i++)); do
47 |     node=node$i
48 |     echo '*** Updating cutoffs on' $node '***'
49 |     ssh $node sudo sysctl /net/homa/unsched_cutoffs=\"$cutoffs\"
50 | done


--------------------------------------------------------------------------------
/cloudlab/bin/update_linux:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (c) 2020-2023 Homa Developers
 4 | # SPDX-License-Identifier: BSD-1-Clause
 5 | 
 6 | # This script uses files on the current machine to update the kernel one
 7 | # or more other machines and reboot to those machines.
 8 | #
 9 | # Usage:
10 | # update_linux num_nodes [first]
11 | #
12 | # The "num_nodes" arguments indicates how many nodes the command should
13 | # be run on (starting at node1). The "first" argument is optional; it is
14 | # an integer identifying the first node on which installation will occur
15 | # (e.g. "update_linux 4 2" means node2 through node5 will be updated).
16 | # "first" defaults to 1.
17 | 
18 | v=`uname -r`
19 | #v=5.17.7+
20 | 
21 | if [ $# -eq 2 ]; then
22 |     first=$2
23 | elif [ $# -eq 1 ]; then
24 |     first=1
25 | else
26 |     echo "Usage: update_linux num_nodes [first]"
27 |     exit 1
28 | fi
29 | last=`expr $first + $1 - 1`
30 | 
31 | for ((i = $first ; i <= $last; i++)); do
32 |     node=node$i
33 |     echo
34 |     echo $node
35 |     ssh $node 'rm -rf tmp; mkdir -p tmp tmp/boot'
36 |     rsync -rtv /boot/initrd.img-$v /boot/config-$v /boot/System.map-$v \
37 |             /boot/vmlinuz-$v $node:tmp/boot/
38 |     ssh $node "sudo cp -f tmp/boot/* /boot; sudo reboot"
39 | done
40 | 


--------------------------------------------------------------------------------
/cloudlab/config_switch:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | # Copyright (c) 2020-2023 Homa Developers
 4 | # SPDX-License-Identifier: BSD-1-Clause
 5 | 
 6 | # This program outputs the commands needed to configure a CloudLab switch
 7 | # for Homa, or to reset it.
 8 | # Usage:
 9 | # config_switch [reset]
10 | #
11 | # Commands will be printed on stdout.
12 | 
13 | import sys
14 | 
15 | # Ports to configure
16 | nodes = [81, 82, 83, 84, 85, 87, 88, 89, 91, 93, 94, 95, 96, 98, 100, 102,
17 |          104, 105, 106, 107, 108, 110, 111, 112, 114, 116, 117, 118, 120]
18 | ports = []
19 | prev_switch = -1
20 | for node in nodes:
21 |     switch = (node-1)//40
22 |     if (switch != prev_switch) and (prev_switch >= 0):
23 |         print("Multiple switches: rack-%d and rack-%d" % (switch, prev_switch),
24 |                 file=sys.stderr)
25 |     prev_switch = switch
26 |     ports.append(node - 40*switch)
27 | ports.sort()
28 | # ports = range(1, 41)
29 | 
30 | def config():
31 |     # First, must enter "configure" mode
32 |     print("enable")
33 |     print("configure terminal")
34 | 
35 |     for port in ports:
36 |         # Configure priorities for Homa.
37 |         print("interface ethernet 1/%d qos trust both" % (port))
38 |         for tc in range(8):
39 |             print("interface ethernet 1/%d traffic-class %d dcb ets strict" %
40 |                     (port, tc))
41 | 
42 |         # Enable large packets
43 |         print("interface ethernet 1/%d mtu 9216 force" % (port))
44 | 
45 |         # Set DCTCP marking thresholds:
46 |         print("interface ethernet 1/%d traffic-class 0 congestion-control ecn "
47 |                 "minimum-absolute 70 maximum-absolute 70" % (port))
48 |         print("interface ethernet 1/%d traffic-class 1 congestion-control ecn "
49 |                 "minimum-absolute 70 maximum-absolute 70" % (port))
50 | 
51 | def reset():
52 |     # First, must enter "configure" mode
53 |     print("enable")
54 |     print("configure terminal")
55 | 
56 |     for port in ports:
57 |         # Restore QOS priorities.
58 |         print("interface ethernet 1/%d no qos trust" % (port))
59 |         for tc in range(8):
60 |             print("interface ethernet 1/%d traffic-class %d no dcb ets" %
61 |                     (port, tc))
62 | 
63 |         # Disable large packets
64 |         print("interface ethernet 1/%d mtu 1500 force" % (port))
65 | 
66 |         # Reset DCTCP marking thresholds:
67 |         print("interface ethernet 1/%d no traffic-class 0 congestion-control"
68 |                 % (port))
69 |         print("interface ethernet 1/%d no traffic-class 1 congestion-control"
70 |                 % (port))
71 | 
72 | if len(sys.argv) > 1:
73 |     if sys.argv[1] == "reset":
74 |         reset()
75 |     else:
76 |         print("Usage: config_switch [reset]", file=sys.stderr)
77 | else:
78 |     config()


--------------------------------------------------------------------------------
/cloudlab/gdbinit:
--------------------------------------------------------------------------------
1 | set style address foreground green
2 | 


--------------------------------------------------------------------------------
/cloudlab/update:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Copyright (c) 2019-2020 Homa Developers
 4 | # SPDX-License-Identifier: BSD-1-Clause
 5 | 
 6 | # This script copies modified information from this directory to the
 7 | # CloudLab machines given by the arguments (defaults are provided if no
 8 | # arguments).
 9 | 
10 | if [ $# -eq 0 ]; then
11 |     targets=`cat $HOME/.cloudlabNode`
12 | else
13 |     targets=$*
14 | fi
15 | 
16 | for t in $targets; do
17 |     echo $t
18 |     rsync -rtv --exclude-from=rsync-exclude.txt ./ ouster@$t:homaModule/
19 | done
20 | for t in $targets; do
21 |     echo $t
22 |     rsync -rtv cloudlab/bin/ ouster@$t:bin/
23 |     rsync -rtv cloudlab/bash_profile ouster@$t:.bash_profile
24 |     rsync -rtv cloudlab/bashrc ouster@$t:.bashrc
25 |     rsync -rtv cloudlab/gdbinit ouster@$t:.gdbinit
26 |     rsync -rtv ~/.ssh/cloudlab_rsa.pub ~/.ssh/cloudlab_rsa $t:.ssh/
27 |     ssh $t 'if [ ! -e .ssh/id_rsa ]; then
28 |         cp .ssh/cloudlab_rsa .ssh/id_rsa
29 |         fi'
30 | done
31 | 


--------------------------------------------------------------------------------
/dissector/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # CMakeLists.txt
 2 | #
 3 | # Copyright 2023 Missing Link Electronics Inc,
 4 | # Björn Petersen <bjoern.petersen@missinglinkelectronics.com>
 5 | #
 6 | # This code is dual licensed under one of the following 2 licenses:
 7 | #
 8 | #                          ################
 9 | #                          # GPL2 License #
10 | #                          ################
11 | #
12 | # This program is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU General Public License
14 | # as published by the Free Software Foundation; either version 2
15 | # of the License, or (at your option) any later version.
16 | #
17 | # This program is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20 | # GNU General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU General Public License
23 | # along with this program; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
25 | #
26 | #
27 | #                          ################
28 | #                          # HOMA License #
29 | #                          ################
30 | #
31 | # Permission to use, copy, modify, and/or distribute this software for any
32 | # purpose with or without fee is hereby granted, provided that the above
33 | # copyright notice and this permission notice appear in all copies.
34 | #
35 | # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
36 | # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
37 | # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
38 | # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
39 | # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
40 | # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
41 | # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
42 | 
43 | cmake_minimum_required(VERSION 3.1)
44 | 
45 | project(HomaDissector VERSION 0.0.1 DESCRIPTION "Wireshark Homa Plugin" LANGUAGES C)
46 | option(INSTALL_PLUGIN_LOCAL "Install the homa dissector plugin inside the local folder of wireshark" ON)
47 | 
48 | find_package(Wireshark CONFIG REQUIRED)
49 | 
50 | if (NOT Wireshark_PLUGINS_ENABLED)
51 |     message(FATAL_ERROR "Wireshark was compiled without support for plugins")
52 | endif ()
53 | 
54 | set(CMAKE_C_VISIBILITY_PRESET hidden)
55 | if (CMAKE_COMPILER_IS_GNUCC)
56 |     set(CMAKE_C_FLAGS "-Wall -Wextra ${CMAKE_C_FLAGS}")
57 | endif ()
58 | 
59 | add_definitions(-DVERSION=\"${PROJECT_VERSION}\")
60 | 
61 | add_library(${PROJECT_NAME} MODULE homa.c)
62 | set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" DEFINE_SYMBOL "")
63 | target_link_libraries(${PROJECT_NAME} epan)
64 | 
65 | if (${INSTALL_PLUGIN_LOCAL})
66 |     install(TARGETS ${PROJECT_NAME} LIBRARY DESTINATION "$ENV{HOME}/.local/lib/wireshark/plugins/${Wireshark_MAJOR_VERSION}.${Wireshark_MINOR_VERSION}/epan" NAMELINK_SKIP)
67 | else ()
68 |     install(TARGETS ${PROJECT_NAME} LIBRARY DESTINATION "${Wireshark_PLUGIN_INSTALL_DIR}/epan" NAMELINK_SKIP)
69 | endif ()
70 | 


--------------------------------------------------------------------------------
/dissector/README.md:
--------------------------------------------------------------------------------
 1 | # HOMA Dissector
 2 | 
 3 | A dissector for viewing [homa](https://homa-transport.atlassian.net/wiki/spaces/HOMA/overview) packets. The dissector
 4 | was tested with Ubuntu 18.04 and Ubuntu 22.04 with the Wireshark version 3.6.2
 5 | 
 6 | ## Prerequisites
 7 | 
 8 | The dissector is a cmake based wireshark plugin. For building please make sure that the required wireshark dependencies,
 9 | including wireshark headers, are installed. For Debian based systems the following command line may be
10 | used: `apt install wireshark-dev wireshark-common`
11 | 
12 | ## Installation
13 | 
14 | The Plugin can be installed with the following steps.
15 | 
16 | ```shell
17 | cmake .
18 | make
19 | make install
20 | ```
21 | 
22 | Per default, the plugin will be installed inside the local plugin folder of wireshark. For installing the plugin global
23 | on your system, run the following command:
24 | 
25 | ```shell
26 | cmake -DINSTALL_PLUGIN_LOCAL=OFF .
27 | make
28 | sudo make install
29 | ```
30 | 


--------------------------------------------------------------------------------
/homa.h:
--------------------------------------------------------------------------------
  1 | /* SPDX-License-Identifier: BSD-2-Clause */
  2 | 
  3 | /* This file defines the kernel call interface for the Homa
  4 |  * transport protocol.
  5 |  */
  6 | 
  7 | #ifndef _UAPI_LINUX_HOMA_H
  8 | #define _UAPI_LINUX_HOMA_H
  9 | 
 10 | #include <linux/types.h>
 11 | #ifndef __KERNEL__
 12 | #include <netinet/in.h>
 13 | #include <sys/socket.h>
 14 | #endif
 15 | 
 16 | /* IANA-assigned Internet Protocol number for Homa. */
 17 | #define IPPROTO_HOMA 146
 18 | 
 19 | /**
 20 |  * define HOMA_MAX_MESSAGE_LENGTH - Maximum bytes of payload in a Homa
 21 |  * request or response message.
 22 |  */
 23 | #define HOMA_MAX_MESSAGE_LENGTH 1000000
 24 | 
 25 | /**
 26 |  * define HOMA_BPAGE_SIZE - Number of bytes in pages used for receive
 27 |  * buffers. Must be power of two.
 28 |  */
 29 | #define HOMA_BPAGE_SIZE (1 << HOMA_BPAGE_SHIFT)
 30 | #define HOMA_BPAGE_SHIFT 16
 31 | 
 32 | /**
 33 |  * define HOMA_MAX_BPAGES - The largest number of bpages that will be required
 34 |  * to store an incoming message.
 35 |  */
 36 | #define HOMA_MAX_BPAGES ((HOMA_MAX_MESSAGE_LENGTH + HOMA_BPAGE_SIZE - 1) \
 37 | 		>> HOMA_BPAGE_SHIFT)
 38 | 
 39 | /**
 40 |  * define HOMA_MIN_DEFAULT_PORT - The 16 bit port space is divided into
 41 |  * two nonoverlapping regions. Ports 1-32767 are reserved exclusively
 42 |  * for well-defined server ports. The remaining ports are used for client
 43 |  * ports; these are allocated automatically by Homa. Port 0 is reserved.
 44 |  */
 45 | #define HOMA_MIN_DEFAULT_PORT 0x8000
 46 | 
 47 | /**
 48 |  * struct homa_sendmsg_args - Provides information needed by Homa's
 49 |  * sendmsg; passed to sendmsg using the msg_control field.
 50 |  */
 51 | struct homa_sendmsg_args {
 52 | 	/**
 53 | 	 * @id: (in/out) An initial value of 0 means a new request is
 54 | 	 * being sent; nonzero means the message is a reply to the given
 55 | 	 * id. If the message is a request, then the value is modified to
 56 | 	 * hold the id of the new RPC.
 57 | 	 */
 58 | 	__u64 id;
 59 | 
 60 | 	/**
 61 | 	 * @completion_cookie: (in) Used only for request messages; will be
 62 | 	 * returned by recvmsg when the RPC completes. Typically used to
 63 | 	 * locate app-specific info about the RPC.
 64 | 	 */
 65 | 	__u64 completion_cookie;
 66 | };
 67 | 
 68 | #if !defined(__cplusplus)
 69 | _Static_assert(sizeof(struct homa_sendmsg_args) >= 16,
 70 | 	       "homa_sendmsg_args shrunk");
 71 | _Static_assert(sizeof(struct homa_sendmsg_args) <= 16,
 72 | 	       "homa_sendmsg_args grew");
 73 | #endif
 74 | 
 75 | /**
 76 |  * struct homa_recvmsg_args - Provides information needed by Homa's
 77 |  * recvmsg; passed to recvmsg using the msg_control field.
 78 |  */
 79 | struct homa_recvmsg_args {
 80 | 	/**
 81 | 	 * @id: (in/out) Initially specifies the id of the desired RPC, or 0
 82 | 	 * if any RPC is OK; returns the actual id received.
 83 | 	 */
 84 | 	__u64 id;
 85 | 
 86 | 	/**
 87 | 	 * @completion_cookie: (out) If the incoming message is a response,
 88 | 	 * this will return the completion cookie specified when the
 89 | 	 * request was sent. For requests this will always be zero.
 90 | 	 */
 91 | 	__u64 completion_cookie;
 92 | 
 93 | 	/**
 94 | 	 * @flags: (in) OR-ed combination of bits that control the operation.
 95 | 	 * See below for values.
 96 | 	 */
 97 | 	__u32 flags;
 98 | 
 99 | 	/**
100 | 	 * @num_bpages: (in/out) Number of valid entries in @bpage_offsets.
101 | 	 * Passes in bpages from previous messages that can now be
102 | 	 * recycled; returns bpages from the new message.
103 | 	 */
104 | 	__u32 num_bpages;
105 | 
106 | 	/**
107 | 	 * @bpage_offsets: (in/out) Each entry is an offset into the buffer
108 | 	 * region for the socket pool. When returned from recvmsg, the
109 | 	 * offsets indicate where fragments of the new message are stored. All
110 | 	 * entries but the last refer to full buffer pages (HOMA_BPAGE_SIZE
111 | 	 * bytes) and are bpage-aligned. The last entry may refer to a bpage
112 | 	 * fragment and is not necessarily aligned. The application now owns
113 | 	 * these bpages and must eventually return them to Homa, using
114 | 	 * bpage_offsets in a future recvmsg invocation.
115 | 	 */
116 | 	__u32 bpage_offsets[HOMA_MAX_BPAGES];
117 | };
118 | 
119 | #if !defined(__cplusplus)
120 | _Static_assert(sizeof(struct homa_recvmsg_args) >= 88,
121 | 	       "homa_recvmsg_args shrunk");
122 | _Static_assert(sizeof(struct homa_recvmsg_args) <= 88,
123 | 	       "homa_recvmsg_args grew");
124 | #endif
125 | 
126 | /* Flag bits for homa_recvmsg_args.flags (see man page for documentation):
127 |  */
128 | #define HOMA_RECVMSG_REQUEST       0x01
129 | #define HOMA_RECVMSG_RESPONSE      0x02
130 | #define HOMA_RECVMSG_NONBLOCKING   0x04
131 | #define HOMA_RECVMSG_VALID_FLAGS   0x07
132 | 
133 | /**
134 |  * struct homa_abort_args - Structure that passes arguments and results
135 |  * between user space and the HOMAIOCABORT ioctl.
136 |  */
137 | struct homa_abort_args {
138 | 	/** @id: Id of RPC to abort, or zero to abort all RPCs on socket. */
139 | 	__u64 id;
140 | 
141 | 	/**
142 | 	 * @error: Zero means destroy and free RPCs; nonzero means complete
143 | 	 * them with this error (recvmsg will return the RPCs).
144 | 	 */
145 | 	int error;
146 | 
147 | 	int _pad1;
148 | 	__u64 _pad2[2];
149 | };
150 | 
151 | #if !defined(__cplusplus)
152 | _Static_assert(sizeof(struct homa_abort_args) >= 32, "homa_abort_args shrunk");
153 | _Static_assert(sizeof(struct homa_abort_args) <= 32, "homa_abort_args grew");
154 | #endif
155 | 
156 | /** define SO_HOMA_RCVBUF: setsockopt option for specifying buffer region. */
157 | #define SO_HOMA_RCVBUF 10
158 | 
159 | /** struct homa_rcvbuf_args - setsockopt argument for SO_HOMA_RCVBUF. */
160 | struct homa_rcvbuf_args {
161 | 	/** @start: Address of first byte of buffer region in user space. */
162 | 	__u64 start;
163 | 
164 | 	/** @length: Total number of bytes available at @start. */
165 | 	size_t length;
166 | };
167 | 
168 | /* Meanings of the bits in Homa's flag word, which can be set using
169 |  * "sysctl /net/homa/flags".
170 |  */
171 | 
172 | /**
173 |  * define HOMA_FLAG_DONT_THROTTLE - disable the output throttling mechanism
174 |  * (always send all packets immediately).
175 |  */
176 | #define HOMA_FLAG_DONT_THROTTLE   2
177 | 
178 | /**
179 |  * I/O control calls on Homa sockets. These are mapped into the
180 |  * SIOCPROTOPRIVATE range of 0x89e0 through 0x89ef.
181 |  */
182 | 
183 | #define HOMAIOCABORT  _IOWR(0x89, 0xe3, struct homa_abort_args)
184 | #define HOMAIOCFREEZE _IO(0x89, 0xef)
185 | 
186 | #ifndef __STRIP__ /* See strip.py */
187 | int     homa_abort(int sockfd, __u64 id, int error);
188 | int     homa_send(int sockfd, const void *message_buf,
189 | 		  size_t length, const struct sockaddr *dest_addr,
190 | 		  __u32 addrlen,  __u64 *id, __u64 completion_cookie);
191 | int     homa_sendv(int sockfd, const struct iovec *iov,
192 | 		   int iovcnt, const struct sockaddr *dest_addr,
193 | 		   __u32 addrlen,  __u64 *id, __u64 completion_cookie);
194 | ssize_t homa_reply(int sockfd, const void *message_buf,
195 | 		   size_t length, const struct sockaddr *dest_addr,
196 | 		   __u32 addrlen,  __u64 id);
197 | ssize_t homa_replyv(int sockfd, const struct iovec *iov,
198 | 		    int iovcnt, const struct sockaddr *dest_addr,
199 | 		    __u32 addrlen,  __u64 id);
200 | #endif /* See strip.py */
201 | 
202 | #endif /* _UAPI_LINUX_HOMA_H */
203 | 


--------------------------------------------------------------------------------
/homa_grant.h:
--------------------------------------------------------------------------------
 1 | /* SPDX-License-Identifier: BSD-2-Clause */
 2 | 
 3 | /* This file contains definitions that related to generating grants. */
 4 | 
 5 | #ifndef _HOMA_GRANT_H
 6 | #define _HOMA_GRANT_H
 7 | 
 8 | int      homa_grantable_lock_slow(struct homa *homa, int recalc);
 9 | void     homa_grant_add_rpc(struct homa_rpc *rpc);
10 | void     homa_grant_check_rpc(struct homa_rpc *rpc);
11 | void     homa_grant_find_oldest(struct homa *homa);
12 | void     homa_grant_free_rpc(struct homa_rpc *rpc);
13 | void     homa_grant_log_tt(struct homa *homa);
14 | int      homa_grant_outranks(struct homa_rpc *rpc1,
15 | 			     struct homa_rpc *rpc2);
16 | int      homa_grant_pick_rpcs(struct homa *homa, struct homa_rpc **rpcs,
17 | 			      int max_rpcs);
18 | void     homa_grant_pkt(struct sk_buff *skb, struct homa_rpc *rpc);
19 | void     homa_grant_recalc(struct homa *homa, int locked);
20 | void     homa_grant_remove_rpc(struct homa_rpc *rpc);
21 | int      homa_grant_send(struct homa_rpc *rpc, struct homa *homa);
22 | int      homa_grant_update_incoming(struct homa_rpc *rpc,
23 | 				    struct homa *homa);
24 | 
25 | /**
26 |  * homa_grantable_lock() - Acquire the grantable lock. If the lock
27 |  * isn't immediately available, record stats on the waiting time.
28 |  * @homa:    Overall data about the Homa protocol implementation.
29 |  * @recalc:  Nonzero means the caller is homa_grant_recalc; if another thread
30 |  *           is already recalculating, can return without waiting for the lock.
31 |  * Return:   Nonzero means this thread now owns the grantable lock. Zero
32 |  *           means the lock was not acquired and there is no need for this
33 |  *           thread to do the work of homa_grant_recalc because some other
34 |  *           thread started a fresh calculation after this method was invoked.
35 |  */
36 | static inline int homa_grantable_lock(struct homa *homa, int recalc)
37 | 	__acquires(&homa->grantable_lock)
38 | {
39 | 	int result;
40 | 
41 | 	if (spin_trylock_bh(&homa->grantable_lock))
42 | 		result = 1;
43 | 	else
44 | 		result = homa_grantable_lock_slow(homa, recalc);
45 | 	homa->grantable_lock_time = sched_clock();
46 | 	return result;
47 | }
48 | 
49 | /**
50 |  * homa_grantable_unlock() - Release the grantable lock.
51 |  * @homa:    Overall data about the Homa protocol implementation.
52 |  */
53 | static inline void homa_grantable_unlock(struct homa *homa)
54 | 	__releases(&homa->grantable_lock)
55 | {
56 | 	INC_METRIC(grantable_lock_ns, sched_clock() -
57 | 		   homa->grantable_lock_time);
58 | 	spin_unlock_bh(&homa->grantable_lock);
59 | }
60 | 
61 | #endif /* _HOMA_GRANT_H */
62 | 


--------------------------------------------------------------------------------
/homa_offload.h:
--------------------------------------------------------------------------------
 1 | /* SPDX-License-Identifier: BSD-2-Clause */
 2 | 
 3 | /* This file contains definitions related to homa_offload.c. */
 4 | 
 5 | #ifndef _HOMA_OFFLOAD_H
 6 | #define _HOMA_OFFLOAD_H
 7 | 
 8 | #include <linux/types.h>
 9 | 
10 | /**
11 |  * struct homa_offload_core - Stores core-specific information used during
12 |  * GRO operations.
13 |  */
14 | struct homa_offload_core {
15 | 	/**
16 | 	 * @last_active: the last time (in sched_clock() units) that
17 | 	 * there was system activity, such NAPI or SoftIRQ, on this
18 | 	 * core. Used for load balancing.
19 | 	 */
20 | 	__u64 last_active;
21 | 
22 | 	/**
23 | 	 * @last_gro: the last time (in sched_clock() units) that
24 | 	 * homa_gro_receive returned on this core. Used to determine
25 | 	 * whether GRO is keeping a core busy.
26 | 	 */
27 | 	__u64 last_gro;
28 | 
29 | 	/**
30 | 	 * @softirq_backlog: the number of batches of packets that have
31 | 	 * been queued for SoftIRQ processing on this core but haven't
32 | 	 * yet been processed.
33 | 	 */
34 | 	atomic_t softirq_backlog;
35 | 
36 | 	/**
37 | 	 * @softirq_offset: used when rotating SoftIRQ assignment among
38 | 	 * the next cores; contains an offset to add to the current core
39 | 	 * to produce the core for SoftIRQ.
40 | 	 */
41 | 	int softirq_offset;
42 | 
43 | 	/**
44 | 	 * @gen3_softirq_cores: when the Gen3 load balancer is in use,
45 | 	 * GRO will arrange for SoftIRQ processing to occur on one of
46 | 	 * these cores; -1 values are ignored (see balance.txt for more
47 | 	 * on lewd balancing). This information is filled in via sysctl.
48 | 	 */
49 | #define NUM_GEN3_SOFTIRQ_CORES 3
50 | 	int gen3_softirq_cores[NUM_GEN3_SOFTIRQ_CORES];
51 | 
52 | 	/**
53 | 	 * @last_app_active: the most recent time (sched_clock() units)
54 | 	 * when an application was actively using Homa on this core (e.g.,
55 | 	 * by sending or receiving messages). Used for load balancing
56 | 	 * (see balance.txt).
57 | 	 */
58 | 	__u64 last_app_active;
59 | 
60 | 	/**
61 | 	 * held_skb: last packet buffer known to be available for
62 | 	 * merging other packets into on this core (note: may not still
63 | 	 * be available), or NULL if none.
64 | 	 */
65 | 	struct sk_buff *held_skb;
66 | 
67 | 	/**
68 | 	 * @held_bucket: the index, within napi->gro_hash, of the list
69 | 	 * containing @held_skb; undefined if @held_skb is NULL. Used to
70 | 	 * verify that @held_skb is still available.
71 | 	 */
72 | 	int held_bucket;
73 | };
74 | DECLARE_PER_CPU(struct homa_offload_core, homa_offload_core);
75 | 
76 | int      homa_gro_complete(struct sk_buff *skb, int thoff);
77 | void     homa_gro_gen2(struct homa *homa, struct sk_buff *skb);
78 | void     homa_gro_gen3(struct homa *homa, struct sk_buff *skb);
79 | void     homa_gro_hook_tcp(void);
80 | void     homa_gro_unhook_tcp(void);
81 | struct sk_buff *homa_gro_receive(struct list_head *gro_list,
82 | 				 struct sk_buff *skb);
83 | struct sk_buff *homa_gso_segment(struct sk_buff *skb,
84 | 				 netdev_features_t features);
85 | int      homa_offload_end(void);
86 | int      homa_offload_init(void);
87 | void     homa_send_ipis(void);
88 | struct sk_buff *homa_tcp_gro_receive(struct list_head *held_list,
89 | 				     struct sk_buff *skb);
90 | 
91 | #endif /* _HOMA_OFFLOAD_H */
92 | 


--------------------------------------------------------------------------------
/homa_pool.h:
--------------------------------------------------------------------------------
  1 | /* SPDX-License-Identifier: BSD-2-Clause */
  2 | 
  3 | /* This file contains definitions used to manage user-space buffer pools.
  4 |  */
  5 | 
  6 | #ifndef _HOMA_POOL_H
  7 | #define _HOMA_POOL_H
  8 | 
  9 | #include "homa_rpc.h"
 10 | 
 11 | /**
 12 |  * struct homa_bpage - Contains information about a single page in
 13 |  * a buffer pool.
 14 |  */
 15 | struct homa_bpage {
 16 | 	union {
 17 | 		/**
 18 | 		 * @cache_line: Ensures that each homa_bpage object
 19 | 		 * is exactly one cache line long.
 20 | 		 */
 21 | 		char cache_line[L1_CACHE_BYTES];
 22 | 		struct {
 23 | 			/** @lock: to synchronize shared access. */
 24 | 			spinlock_t lock;
 25 | 
 26 | 			/**
 27 | 			 * @refs: Counts number of distinct uses of this
 28 | 			 * bpage (1 tick for each message that is using
 29 | 			 * this page, plus an additional tick if the @owner
 30 | 			 * field is set).
 31 | 			 */
 32 | 			atomic_t refs;
 33 | 
 34 | 			/**
 35 | 			 * @owner: kernel core that currently owns this page
 36 | 			 * (< 0 if none).
 37 | 			 */
 38 | 			int owner;
 39 | 
 40 | 			/**
 41 | 			 * @expiration: time (in sched_clock() units) after
 42 | 			 * which it's OK to steal this page from its current
 43 | 			 * owner (if @refs is 1).
 44 | 			 */
 45 | 			__u64 expiration;
 46 | 		};
 47 | 	};
 48 | };
 49 | 
 50 | #ifndef __STRIP__ /* See strip.py */
 51 | _Static_assert(sizeof(struct homa_bpage) == L1_CACHE_BYTES,
 52 | 	       "homa_bpage overflowed a cache line");
 53 | #endif /* See strip.py */
 54 | 
 55 | /**
 56 |  * struct homa_pool_core - Holds core-specific data for a homa_pool (a bpage
 57 |  * out of which that core is allocating small chunks).
 58 |  */
 59 | struct homa_pool_core {
 60 | 	union {
 61 | 		/**
 62 | 		 * @cache_line: Ensures that each object is exactly one
 63 | 		 * cache line long.
 64 | 		 */
 65 | 		char cache_line[L1_CACHE_BYTES];
 66 | 		struct {
 67 | 			/**
 68 | 			 * @page_hint: Index of bpage in pool->descriptors,
 69 | 			 * which may be owned by this core. If so, we'll use it
 70 | 			 * for allocating partial pages.
 71 | 			 */
 72 | 			int page_hint;
 73 | 
 74 | 			/**
 75 | 			 * @allocated: if the page given by @page_hint is
 76 | 			 * owned by this core, this variable gives the number of
 77 | 			 * (initial) bytes that have already been allocated
 78 | 			 * from the page.
 79 | 			 */
 80 | 			int allocated;
 81 | 
 82 | 			/**
 83 | 			 * @next_candidate: when searching for free bpages,
 84 | 			 * check this index next.
 85 | 			 */
 86 | 			int next_candidate;
 87 | 		};
 88 | 	};
 89 | };
 90 | 
 91 | #ifndef __STRIP__ /* See strip.py */
 92 | _Static_assert(sizeof(struct homa_pool_core) == L1_CACHE_BYTES,
 93 | 	       "homa_pool_core overflowed a cache line");
 94 | #endif /* See strip.py */
 95 | 
 96 | /**
 97 |  * struct homa_pool - Describes a pool of buffer space for incoming
 98 |  * messages for a particular socket; managed by homa_pool.c. The pool is
 99 |  * divided up into "bpages", which are a multiple of the hardware page size.
100 |  * A bpage may be owned by a particular core so that it can more efficiently
101 |  * allocate space for small messages.
102 |  */
103 | struct homa_pool {
104 | 	/**
105 | 	 * @hsk: the socket that this pool belongs to.
106 | 	 */
107 | 	struct homa_sock *hsk;
108 | 
109 | 	/**
110 | 	 * @region: beginning of the pool's region (in the app's virtual
111 | 	 * memory). Divided into bpages. 0 means the pool hasn't yet been
112 | 	 * initialized.
113 | 	 */
114 | 	char __user *region;
115 | 
116 | 	/** @num_bpages: total number of bpages in the pool. */
117 | 	int num_bpages;
118 | 
119 | 	/** @descriptors: kmalloced area containing one entry for each bpage. */
120 | 	struct homa_bpage *descriptors;
121 | 
122 | 	/**
123 | 	 * @free_bpages: the number of pages still available for allocation
124 | 	 * by homa_pool_get pages. This equals the number of pages with zero
125 | 	 * reference counts, minus the number of pages that have been claimed
126 | 	 * by homa_get_pool_pages but not yet allocated.
127 | 	 */
128 | 	atomic_t free_bpages;
129 | 
130 | 	/**
131 | 	 * @bpages_needed: the number of free bpages required to satisfy the
132 | 	 * needs of the first RPC on @hsk->waiting_for_bufs, or INT_MAX if
133 | 	 * that queue is empty.
134 | 	 */
135 | 	int bpages_needed;
136 | 
137 | 	/** @cores: core-specific info; dynamically allocated. */
138 | 	struct homa_pool_core *cores;
139 | 
140 | 	/** @num_cores: number of elements in @cores. */
141 | 	int num_cores;
142 | 
143 | 	/**
144 | 	 * @check_waiting_invoked: incremented during unit tests when
145 | 	 * homa_pool_check_waiting is invoked.
146 | 	 */
147 | 	int check_waiting_invoked;
148 | };
149 | 
150 | int      homa_pool_allocate(struct homa_rpc *rpc);
151 | void     homa_pool_check_waiting(struct homa_pool *pool);
152 | void     homa_pool_destroy(struct homa_pool *pool);
153 | void __user *homa_pool_get_buffer(struct homa_rpc *rpc, int offset,
154 | 				  int *available);
155 | int      homa_pool_get_pages(struct homa_pool *pool, int num_pages,
156 | 			     __u32 *pages, int leave_locked);
157 | void     homa_pool_get_rcvbuf(struct homa_sock *hsk,
158 | 			      struct homa_rcvbuf_args *args);
159 | int      homa_pool_init(struct homa_sock *hsk, void *buf_region,
160 | 			__u64 region_size);
161 | int      homa_pool_release_buffers(struct homa_pool *pool,
162 | 				   int num_buffers, __u32 *buffers);
163 | 
164 | #endif /* _HOMA_POOL_H */
165 | 


--------------------------------------------------------------------------------
/homa_receiver.cc:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) 2022 Homa Developers
  2 |  * SPDX-License-Identifier: BSD-1-Clause
  3 |  */
  4 | 
  5 | #include <string.h>
  6 | 
  7 | #include "homa_receiver.h"
  8 | 
  9 | /**
 10 |  * homa::receiver::homa() - Constructor for receivers.
 11 |  * @fd:         Homa socket from which this object will receive incoming
 12 |  *              messages. The caller is responsible for setting up buffering
 13 |  *              on the socket using setsockopt with the SO_HOMA_RCVBUF option.
 14 |  *              The file descriptor must be valid for the lifetime of this
 15 |  *              object.
 16 |  * @buf_region: Location of the buffer region that was allocated for
 17 |  *              this socket.
 18 |  */
 19 | homa::receiver::receiver(int fd, void *buf_region)
 20 | 	: fd(fd)
 21 | 	, hdr()
 22 | 	, control()
 23 | 	, source()
 24 |         , msg_length(-1)
 25 |         , buf_region(reinterpret_cast<char *>(buf_region))
 26 | {
 27 | 	memset(&hdr, 0, sizeof(hdr));
 28 | 	hdr.msg_name = &source;
 29 | 	hdr.msg_namelen = sizeof(source);
 30 | 	hdr.msg_control = &control;
 31 | 	hdr.msg_controllen = sizeof(control);
 32 | 
 33 | 	memset(&control, 0, sizeof(control));
 34 | }
 35 | 
 36 | /**
 37 |  * homa::receiver::~homa() - Destructor for homa::receivers. The main purpose of
 38 |  * this destructor is to return any residual buffers to Homa.
 39 |  */
 40 | homa::receiver::~receiver()
 41 | {
 42 | 	release();
 43 | }
 44 | 
 45 | /**
 46 |  * homa::receiver::copy_out() - Copy data out of the current message.
 47 |  * @dest:     Data will be copied here.
 48 |  * @offset:   Offset within the message of the first byte to copy.
 49 |  * @count:    Number of bytes to copy; if the message doesn't contain
 50 |  *            this many bytes starting at offset, then only the
 51 |  *            available number of bytes will be copied.
 52 |  */
 53 | void homa::receiver::copy_out(void *dest, size_t offset, size_t count) const
 54 | {
 55 | 	char *cdest = static_cast<char *>(dest);
 56 | 	ssize_t limit = offset + count;
 57 | 
 58 | 	if (limit > msg_length)
 59 | 		limit = msg_length;
 60 | 	while (static_cast<ssize_t>(offset) < limit) {
 61 | 		size_t chunk_size = contiguous(offset);
 62 | 
 63 | 		memcpy(cdest, get<char>(offset), chunk_size);
 64 | 		offset += chunk_size;
 65 | 		cdest += chunk_size;
 66 | 	}
 67 | }
 68 | 
 69 | /**
 70 |  * homa::receiver::receive() - Release resources for the current message, if
 71 |  * any, and receive a new incoming message.
 72 |  * @flags:    Various OR'ed bits such as HOMA_RECVMSG_REQUEST and
 73 |  *            HOMA_RECVMSG_NONBLOCKING. See the Homa documentation
 74 |  *            for the flags field of recvmsg for details.
 75 |  * @id:       Identifier of a particular RPC whose result is desired,
 76 |  *            or 0. See the Homa documentation for the id field of
 77 |  *            recvmsg for details.
 78 |  * Return:    The length of the new active message. If an error occurs, -1
 79 |  *            is returned and additional information is available in
 80 |  *            errno. Note: if id() returns a nonzero result after an
 81 |  *            error, it means that that RPC has now completed with an error
 82 |  *            and errno describes the nature of the error.
 83 |  */
 84 | size_t homa::receiver::receive(int flags, uint64_t id)
 85 | {
 86 | 	control.flags = flags;
 87 | 	control.id = id;
 88 | 	hdr.msg_namelen = sizeof(source);
 89 | 	hdr.msg_controllen = sizeof(control);
 90 | 	msg_length = recvmsg(fd, &hdr, 0);
 91 | 	if (msg_length < 0) {
 92 | 		control.num_bpages = 0;
 93 | 		id = 0;
 94 | 	}
 95 | 	return msg_length;
 96 | }
 97 | 
 98 | /**
 99 |  * homa::receiver::release() - Release any resources associated with the
100 |  * current message, if any. The current message must not be accessed again
101 |  * until receive has returned successfully.
102 |  */
103 | void homa::receiver::release()
104 | {
105 | 	if (control.num_bpages == 0)
106 | 		return;
107 | 
108 | 	/* This recvmsg request will do nothing except return buffer space. */
109 | 	control.flags = HOMA_RECVMSG_NONBLOCKING;
110 | 	control.id = 0;
111 | 	recvmsg(fd, &hdr, 0);
112 | 	control.num_bpages = 0;
113 | 	msg_length = -1;
114 | }


--------------------------------------------------------------------------------
/homa_receiver.h:
--------------------------------------------------------------------------------
  1 | /* SPDX-License-Identifier: BSD-2-Clause */
  2 | 
  3 | #pragma once
  4 | 
  5 | #include <sys/socket.h>
  6 | #include <sys/types.h>
  7 | 
  8 | extern "C" {
  9 | #include "homa.h"
 10 | }
 11 | 
 12 | namespace homa {
 13 | /* Helper class for receiving a series of messages from a Homa socket. This
 14 |  * class serves two purposes: first, it implements the application side of
 15 |  * the Homa buffer management protocol, returning receive buffer space to
 16 |  * Homa when the application longer needs it. Second, it provides convenience
 17 |  * methods for accessing messages that are scattered over several discontiguous
 18 |  * regions of buffer space.
 19 |  *
 20 |  * Typical usage:
 21 |  * - Call receive, which will invoke Homa to receive an incoming message.
 22 |  * - Access the message using methods such as get and copy_out (note: if
 23 |  *   the message is shorter than HOMA_BPAGE_SIZE then it will be contiguous).
 24 |  * - Call receive to get the next message. This releases all of the resources
 25 |  *   associated with the previous message, so you can no longer access that.
 26 |  * - Access the new message ...
 27 |  *
 28 |  * A single homa::receiver allows only a single active incoming message
 29 |  * at a time. However, you can create multiple homa::receivers for the
 30 |  * same Homa socket, each of which can have one active message. An
 31 |  * individual homa::receiver is not thread-safe.
 32 |  */
 33 | class receiver {
 34 | public:
 35 | 	receiver(int fd, void *buf_regio);
 36 | 	~receiver();
 37 | 
 38 | 	/**
 39 | 	 * homa::receiver::contiguous() - Return a count of the number
 40 | 	 * of contiguous bytes that are available in the current message
 41 | 	 * at a given offset. Zero is returned if there is no current message
 42 | 	 * or the offset is beyond the end of the message.
 43 | 	 * @offset:  An offset from the beginning of the current message.
 44 | 	 */
 45 | 	inline size_t contiguous(size_t offset) const
 46 | 	{
 47 | 		if (static_cast<ssize_t>(offset) >= msg_length)
 48 | 			return 0;
 49 | 		if ((offset >> HOMA_BPAGE_SHIFT) == (control.num_bpages - 1))
 50 | 			return msg_length - offset;
 51 | 		return HOMA_BPAGE_SIZE - (offset & (HOMA_BPAGE_SIZE - 1));
 52 | 	}
 53 | 
 54 | 	/**
 55 | 	 * homa::receiver::completion_cookie() - Return the completion
 56 | 	 * cookie associated with the current message; result is undefined
 57 | 	 * if there is no current message.
 58 | 	 */
 59 | 	uint64_t completion_cookie(void) const
 60 | 	{
 61 | 		return control.completion_cookie;
 62 | 	}
 63 | 
 64 | 	void copy_out(void *dest, size_t offset, size_t count) const;
 65 | 
 66 | 	/**
 67 | 	 * homa::receiver::get() - Make part of the current message
 68 | 	 * accessible.
 69 | 	 * @offset:   Offset within the message of the first byte of an object
 70 | 	 *            of type T
 71 | 	 * @storage:  Pointer to a memory region containing at least sizeof(T)
 72 | 	 *            bytes. If the desired object's bytes are not currently in
 73 | 	 *            contiguous storage in the message, and if this argument
 74 | 	 *            is non-null, information is copied out of the message
 75 | 	 *            into this object so that it is contiguous.
 76 | 	 * Return:    A pointer to the desired object (either in the message
 77 | 	 *            or at *storage), or nullptr if the object could not be
 78 | 	 *            returned (because it extended beyond the end of the
 79 | 	 *            message, or it wasn't contiguous and storage was nullptr)
 80 | 	 */
 81 | 	template<typename T>
 82 | 	inline T* get(size_t offset, T* storage = nullptr) const {
 83 | 		int buf_num = offset >> HOMA_BPAGE_SHIFT;
 84 | 
 85 | 		if (static_cast<ssize_t>(offset + sizeof(T)) > msg_length)
 86 | 			return nullptr;
 87 | 		if (contiguous(offset) >= sizeof(T))
 88 | 			return reinterpret_cast<T*>(buf_region
 89 | 					+ control.bpage_offsets[buf_num]
 90 | 					+ (offset & (HOMA_BPAGE_SIZE - 1)));
 91 | 		if (storage)
 92 | 			copy_out(storage, offset, sizeof(T));
 93 | 		return storage;
 94 | 	}
 95 | 
 96 | 	/**
 97 | 	 * id() - Return the Homa RPC identifier for the current message,
 98 | 	 * or 0 if there is no current message.
 99 | 	 */
100 | 	inline uint64_t id(void) const
101 | 	{
102 | 		return control.id;
103 | 	}
104 | 
105 | 	/**
106 | 	 * homa::receiver::is_request() - Return true if the current message
107 | 	 * is a request, and false if it is a response or if there is no
108 | 	 * current message.
109 | 	 */
110 | 	bool is_request(void) const
111 | 	{
112 | 		return control.id & 1;
113 | 	}
114 | 
115 | 	/**
116 | 	 * homa::receiver::length() - Return the total number of bytes
117 | 	 * current message, or a negative value if there is no current
118 | 	 * message.
119 | 	 */
120 | 	ssize_t length(void) const
121 | 	{
122 | 		return msg_length;
123 | 	}
124 | 
125 | 	size_t receive(int flags, uint64_t id);
126 | 	void release(void);
127 | 
128 | 	/**
129 | 	 * homa::receiver::src_addr() - Return a pointer to the address
130 | 	 * of the sender of the current message. The result is undefined
131 | 	 * if there is no current message.
132 | 	 */
133 | 	const struct sockaddr *src_addr(void) const
134 | 	{
135 | 		return &source.sa;
136 | 	}
137 | 
138 | protected:
139 | 	/** @fd: File descriptor for an open Homa socket. */
140 | 	int fd;
141 | 
142 | 	/** @hdr: Used to pass information to the recvmsg system call. */
143 | 	struct msghdr hdr;
144 | 
145 | 	/**
146 | 	 * @control: Additional Homa-specific information passed to the
147 | 	 * recvmsg system call through hdr->msg_control. Note: if
148 | 	 * num_buffers != 0, it means this contains buffers from a previous
149 | 	 * message that must be returned to Homa.
150 | 	 */
151 | 	struct homa_recvmsg_args control;
152 | 
153 | 	/** @source: Address of the node that sent the current message. */
154 | 	union {
155 | 		struct sockaddr sa;
156 | 		struct sockaddr_in in4;
157 | 		struct sockaddr_in6 in6;
158 | 	} source;
159 | 
160 | 	/** @length: Length of the current message, or < 0  if none. */
161 | 	ssize_t msg_length;
162 | 
163 | 	/** @buf_region: First byte of buffer space for this message. */
164 | 	char *buf_region;
165 | };
166 | 
167 | }    // namespace homa
168 | 


--------------------------------------------------------------------------------
/homa_skb.h:
--------------------------------------------------------------------------------
  1 | /* SPDX-License-Identifier: BSD-2-Clause */
  2 | 
  3 | /* This file contains definitions related to efficient management of
  4 |  * memory associated with transmit sk_buffs.
  5 |  */
  6 | 
  7 | #ifndef _HOMA_SKB_H
  8 | #define _HOMA_SKB_H
  9 | 
 10 | #include <linux/percpu-defs.h>
 11 | 
 12 | /**
 13 |  * define HOMA_SKB_PAGE_ORDER - exponent (power of two) determining how
 14 |  * many pages to allocate in a high-order page for skb pages (e.g.,
 15 |  * 2 means allocate in units of 4 pages).
 16 |  */
 17 | #define HOMA_SKB_PAGE_ORDER 4
 18 | 
 19 | /**
 20 |  * define HOMA_SKB_PAGE_SIZE - number of bytes corresponding to HOMA_PAGE_ORDER.
 21 |  */
 22 | #define HOMA_SKB_PAGE_SIZE (PAGE_SIZE << HOMA_SKB_PAGE_ORDER)
 23 | 
 24 | /**
 25 |  * struct homa_page_pool - A cache of free pages available for use in tx skbs.
 26 |  * Each page is of size HOMA_SKB_PAGE_SIZE, and a pool is dedicated for
 27 |  * use by a single NUMA node. Access to these objects is synchronized with
 28 |  * @homa->page_pool_mutex.
 29 |  */
 30 | struct homa_page_pool {
 31 | 	/** @avail: Number of free pages currently in the pool. */
 32 | 	int avail;
 33 | 
 34 | 	/**
 35 | 	 * @low_mark: Low water mark: smallest value of avail since the
 36 | 	 * last time homa_skb_release_pages reset it.
 37 | 	 */
 38 | 	int low_mark;
 39 | 
 40 | #define HOMA_PAGE_POOL_SIZE 1000
 41 | 
 42 | 	/**
 43 | 	 * @pages: Pointers to pages that are currently free; the ref count
 44 | 	 * is 1 in each of these pages.
 45 | 	 */
 46 | 	struct page *pages[HOMA_PAGE_POOL_SIZE];
 47 | };
 48 | 
 49 | /**
 50 |  * struct homa_skb_core - Stores core-specific information related to
 51 |  * sk_buff allocation. All values are assumed to be zero initially.
 52 |  */
 53 | struct homa_skb_core {
 54 | 	/**
 55 | 	 * @pool: NUMA-specific page pool from which to allocate skb pages
 56 | 	 * for this core.
 57 | 	 */
 58 | 	struct homa_page_pool *pool;
 59 | 
 60 | 	/**
 61 | 	 * @skb_page: a page of data available being used for skb frags.
 62 | 	 * This pointer is included in the page's reference count.
 63 | 	 */
 64 | 	struct page *skb_page;
 65 | 
 66 | 	/**
 67 | 	 * @page_inuse: offset of first byte in @skb_page that hasn't already
 68 | 	 * been allocated.
 69 | 	 */
 70 | 	int page_inuse;
 71 | 
 72 | 	/** @page_size: total number of bytes available in @skb_page. */
 73 | 	int page_size;
 74 | 
 75 | 	/* Maximum number of stashed pages that can be consumed by a message
 76 | 	 * of a given size (assumes page_inuse is 0). This is a rough guess,
 77 | 	 * since it doesn't consider all of the data_segments that will be
 78 | 	 * needed for the packets.
 79 | 	 */
 80 | #define HOMA_MAX_STASHED(size) ((((size) - 1) / HOMA_SKB_PAGE_SIZE) + 1)
 81 | 
 82 | 	/**
 83 | 	 * @num_stashed_pages: number of pages currently available in
 84 | 	 * stashed_pages.
 85 | 	 */
 86 | 	int num_stashed_pages;
 87 | 
 88 | 	/**
 89 | 	 * @stashed_pages: use to prefetch from the cache all of the pages a
 90 | 	 * message will need with a single operation, to avoid having to
 91 | 	 * synchronize separately for each page. Note: these pages are all
 92 | 	 * HOMA_SKB_PAGE_SIZE in length.
 93 | 	 */
 94 | 	struct page *stashed_pages[HOMA_MAX_STASHED(HOMA_MAX_MESSAGE_LENGTH)];
 95 | };
 96 | DECLARE_PER_CPU(struct homa_skb_core, homa_skb_core);
 97 | 
 98 | int      homa_skb_append_from_iter(struct homa *homa,
 99 | 				   struct sk_buff *skb, struct iov_iter *iter,
100 | 				   int length);
101 | int      homa_skb_append_from_skb(struct homa *homa,
102 | 				  struct sk_buff *dst_skb,
103 | 				  struct sk_buff *src_skb, int offset,
104 | 				  int length);
105 | int      homa_skb_append_to_frag(struct homa *homa, struct sk_buff *skb,
106 | 				 void *buf, int length);
107 | void     homa_skb_cache_pages(struct homa *homa, struct page **pages,
108 | 			      int count);
109 | void     homa_skb_cleanup(struct homa *homa);
110 | void    *homa_skb_extend_frags(struct homa *homa, struct sk_buff *skb,
111 | 			       int *length);
112 | void     homa_skb_free_tx(struct homa *homa, struct sk_buff *skb);
113 | void     homa_skb_free_many_tx(struct homa *homa, struct sk_buff **skbs,
114 | 			       int count);
115 | void     homa_skb_get(struct sk_buff *skb, void *dest, int offset,
116 | 		      int length);
117 | int      homa_skb_init(struct homa *homa);
118 | struct sk_buff *homa_skb_new_tx(int length);
119 | bool     homa_skb_page_alloc(struct homa *homa,
120 | 			     struct homa_skb_core *core);
121 | void     homa_skb_release_pages(struct homa *homa);
122 | void     homa_skb_stash_pages(struct homa *homa, int length);
123 | 
124 | #endif /* _HOMA_SKB_H */
125 | 


--------------------------------------------------------------------------------
/homa_stub.h:
--------------------------------------------------------------------------------
 1 | /* SPDX-License-Identifier: BSD-2-Clause */
 2 | 
 3 | /* This file contains stripped-down replacements that have been
 4 |  * temporarily removed from Homa during the Linux upstreaming
 5 |  * process. By the time upstreaming is complete this file will
 6 |  * have gone away.
 7 |  */
 8 | 
 9 | #ifndef _HOMA_STUB_H
10 | #define _HOMA_STUB_H
11 | 
12 | #include "homa_impl.h"
13 | 
14 | static inline int homa_skb_append_from_iter(struct homa *homa,
15 | 					    struct sk_buff *skb,
16 | 					    struct iov_iter *iter, int length)
17 | {
18 | 	char *dst = skb_put(skb, length);
19 | 
20 | 	if (copy_from_iter(dst, length, iter) != length)
21 | 		return -EFAULT;
22 | 	return 0;
23 | }
24 | 
25 | static inline int homa_skb_append_to_frag(struct homa *homa,
26 | 					  struct sk_buff *skb, void *buf,
27 | 					  int length)
28 | {
29 | 	char *dst = skb_put(skb, length);
30 | 
31 | 	memcpy(dst, buf, length);
32 | 	return 0;
33 | }
34 | 
35 | static inline int  homa_skb_append_from_skb(struct homa *homa,
36 | 					    struct sk_buff *dst_skb,
37 | 					    struct sk_buff *src_skb,
38 | 					    int offset, int length)
39 | {
40 | 	return homa_skb_append_to_frag(homa, dst_skb,
41 | 			skb_transport_header(src_skb) + offset, length);
42 | }
43 | 
44 | static inline void homa_skb_free_tx(struct homa *homa, struct sk_buff *skb)
45 | {
46 | 	kfree_skb(skb);
47 | }
48 | 
49 | static inline void homa_skb_free_many_tx(struct homa *homa,
50 | 					 struct sk_buff **skbs, int count)
51 | {
52 | 	int i;
53 | 
54 | 	for (i = 0; i < count; i++)
55 | 		kfree_skb(skbs[i]);
56 | }
57 | 
58 | static inline void homa_skb_get(struct sk_buff *skb, void *dest, int offset,
59 | 				int length)
60 | {
61 | 	memcpy(dest, skb_transport_header(skb) + offset, length);
62 | }
63 | 
64 | static inline struct sk_buff *homa_skb_new_tx(int length)
65 | {
66 | 	struct sk_buff *skb;
67 | 
68 | 	skb = alloc_skb(HOMA_SKB_EXTRA + HOMA_IPV6_HEADER_LENGTH +
69 | 			sizeof(struct homa_skb_info) + length,
70 | 			GFP_KERNEL);
71 | 	if (likely(skb)) {
72 | 		skb_reserve(skb, HOMA_SKB_EXTRA + HOMA_IPV6_HEADER_LENGTH);
73 | 		skb_reset_transport_header(skb);
74 | 	}
75 | 	return skb;
76 | }
77 | 
78 | static inline void homa_skb_stash_pages(struct homa *homa, int length)
79 | {}
80 | 
81 | #endif /* _HOMA_STUB_H */
82 | 


--------------------------------------------------------------------------------
/man/Makefile:
--------------------------------------------------------------------------------
 1 | # Makefile to build man pages for Homa.
 2 | 
 3 | SRCS := homa.7 \
 4 | 	homa_abort.3 \
 5 |         homa_reply.3 \
 6 |         homa_send.3 \
 7 |         recvmsg.2 \
 8 |         sendmsg.2
 9 | 
10 | PDFS := $(patsubst %.2,%.pdf,$(SRCS))
11 | PDFS := $(patsubst %.3,%.pdf,$(PDFS))
12 | PDFS := $(patsubst %.7,%.pdf,$(PDFS))
13 | all: $(PDFS)
14 | 
15 | clean:
16 | 	rm -f *.pdf
17 | 
18 | # Note: in the rules below, it doesn't seem to work to eliminate the
19 | # temporary file and use ps2pdf in a pipeline; as of 12/2024, under
20 | # Cygwin, this produces blank output for some man pages under some
21 | # conditions.
22 | %.pdf: %.2
23 | 	groff -man -Tps $< > tmp.ps
24 | 	ps2pdf tmp.ps $@
25 | 	rm tmp.ps
26 | 
27 | %.pdf: %.3
28 | 	groff -man -Tps $< > tmp.ps
29 | 	ps2pdf tmp.ps $@
30 | 	rm tmp.ps
31 | 
32 | %.pdf: %.7
33 | 	groff -man -Tps $< > tmp.ps
34 | 	ps2pdf tmp.ps $@
35 | 	rm tmp.ps
36 | 
37 | # The following target is useful for debugging Makefiles; it
38 | # prints the value of a make variable.
39 | print-%:
40 | 	@echo $* = $($*)


--------------------------------------------------------------------------------
/man/homa_abort.3:
--------------------------------------------------------------------------------
 1 | .TH HOMA_ABORT 3 2022-9-15 "Homa" "Linux Programmer's Manual"
 2 | .SH NAME
 3 | homa_abort \- terminate an outgoing RPC
 4 | .SH SYNOPSIS
 5 | .nf
 6 | .B #include <homa.h>
 7 | .PP
 8 | .BI "int homa_abort(int " sockfd ", uint64_t " id ", int " error );
 9 | .PP
10 | .BI "int homa_abortp(int " sockfd ", struct homa_abort_args *" args );
11 | .fi
12 | .SH DESCRIPTION
13 | These two functions will cancel the execution of one (or all) outgoing RPCs.
14 | They behave identically except that
15 | .BR homa_abort
16 | receives its arguments as separate parameters, whereas
17 | .BR homa_abortp
18 | packs all of the arguments into a structure:
19 | .PP
20 | .in +4n
21 | .ps -1
22 | .vs -2
23 | .EX
24 | struct homa_abort_args {
25 |     uint64_t id;
26 |     int error;
27 | };
28 | .EE
29 | .vs +2
30 | .ps +1
31 | .in
32 | .PP
33 | The
34 | .I id
35 | argument contains the identifier for an RPC; if this RPC is active on
36 | .IR sockfd
37 | then it is aborted.
38 | If
39 | .I id
40 | is 0 then all outgoing RPCs on
41 | .IR sockfd
42 | will be aborted.
43 | If
44 | .I error
45 | is 0, then the matching RPCs will be deleted and all state associated
46 | with them will be freed (the RPCs will not
47 | be returned by
48 | .BR homa_recv ).
49 | If
50 | .I error
51 | is nonzero, then the RPC(s) will immediately be placed in the completed
52 | state so that they can be returned by
53 | .BR homa_recv ;
54 | the
55 | .BR homa_recv
56 | call will return an error, with an
57 | .I errno
58 | value of
59 | .I error.
60 | Regardless of whether the RPC(s) are completed or freed, the
61 | servers for the RPCs
62 | are not notified of the abort. If a
63 | request has already been transmitted to the server at the time
64 | .B homa_abort
65 | is invoked, it may still be executed on the server. Any response
66 | from the server will be discarded.
67 | 
68 | .SH RETURN VALUE
69 | On success, the return value is 0.
70 | On error, \-1 is returned and
71 | .I errno
72 | is set appropriately.
73 | 
74 | .SH ERRORS
75 | .TP
76 | .B EALREADY
77 | .I error
78 | and
79 | .I id
80 | were both nonzero, but the RPC was already in the completed state. In this
81 | case the system call has no effect.
82 | .TP
83 | .B EFAULT
84 | An invalid user space address was specified for an argument.
85 | .TP
86 | .B EINVAL
87 | There is no RPC corresponding to
88 | .IR id .
89 | .SH SEE ALSO
90 | .BR homa_recv (3),
91 | .BR homa_reply (3),
92 | .BR homa_send (3),
93 | .BR homa (7)
94 | 


--------------------------------------------------------------------------------
/man/homa_reply.3:
--------------------------------------------------------------------------------
 1 | .TH HOMA_REPLY 3 2024-11-11 "Homa" "Linux Programmer's Manual"
 2 | .SH NAME
 3 | homa_reply, homa_replyv \- send a Homa response message
 4 | .SH SYNOPSIS
 5 | .nf
 6 | .B #include <homa.h>
 7 | .PP
 8 | .BI "int homa_reply(int " sockfd ", const void *" message_buf ", size_t " \
 9 | length ,
10 | .BI "               const struct sockaddr *" dest_addr ", size_t " \
11 | addrlen  ,
12 | .BI "               uint64_t " id );>
13 | .PP
14 | .BI "int homa_replyv(int " sockfd ", const struct iovec *" iov ", size_t " \
15 | iovcnt ,
16 | .BI "               const struct sockaddr *" dest_addr ", size_t " \
17 | addrlen  ,
18 | .BI "               uint64_t " id );
19 | .fi
20 | .SH DESCRIPTION
21 | .BR homa_reply
22 | and
23 | .BR homa_replyv
24 | are convenience functions layered on top of the
25 | .B sendmsg
26 | system call.
27 | Either may be used to transmit a response message using the Homa
28 | transport protocol.
29 | The argument
30 | .I sockfd
31 | is the file descriptor of a Homa socket to use for sending the response.
32 | With
33 | .BR homa_reply
34 | the response message is stored in a single contiguous buffer pointed to by
35 | .IR message_buf ,
36 | and the argument
37 | .I length
38 | gives the length of the message in bytes.
39 | With
40 | .BR homa_replyv
41 | the response message consists of multiple disjoint chunks, specified
42 | by
43 | .I iovcnt
44 | descriptors at
45 | .IR iov .
46 | In either case the total message length must not exceed
47 | .BR HOMA_MAX_MESSAGE_LENGTH .
48 | The destination for the response is given by
49 | .IR dest_addr ,
50 | which can hold either an IPv4 or an IPv6 address. The length
51 | of the address is given by
52 | .IR addrlen .
53 | The argument
54 | .I id
55 | is an identifier previously returned by
56 | .BR recvmsg (2);
57 | along with
58 | .IR dest_addr ,
59 | it identifies the request for which this message is the response.
60 | .PP
61 | This function returns as soon as the response has been queued for
62 | transmission.
63 | .SH RETURN VALUE
64 | On success, the return value is 0.
65 | On error, \-1 is returned and
66 | .I errno
67 | is set appropriately.
68 | .SH ERRORS
69 | See
70 | .BR sendmsg (2)
71 | for details on the
72 | .I errno
73 | values returned after errors.
74 | .SH SEE ALSO
75 | .BR recvmsg (2),
76 | .BR sendmsg (2),
77 | .BR homa_abort (3),
78 | .BR homa_send (3),
79 | .BR homa (7)
80 | 


--------------------------------------------------------------------------------
/man/homa_send.3:
--------------------------------------------------------------------------------
 1 | .TH HOMA_SEND 3 2024-11-11 "Homa" "Linux Programmer's Manual"
 2 | .SH NAME
 3 | homa_send, homa_sendv \- send a request message
 4 | .SH SYNOPSIS
 5 | .nf
 6 | .B #include <homa.h>
 7 | .PP
 8 | .BI "int homa_send(int " sockfd ", const void *" message_buf ", size_t " length \
 9 | ", const struct sockaddr *" dest_addr ",
10 | .BI "              size_t " addrlen ", uint64_t *" id ", uint64_t " \
11 | "completion_cookie" );
12 | .PP
13 | .BI "int homa_sendv(int " sockfd ", const struct iovec *" iov ", size_t " \
14 | iovcnt ", const sockaddr *" dest_addr ,
15 | .BI "              size_t " addrlen ", uint64_t *" id ", uint64_t " \
16 | "completion_cookie" );
17 | .fi
18 | .SH DESCRIPTION
19 | .BR homa_send
20 | and
21 | .BR homa_sendv
22 | are convenience functions layered on top of the
23 | .B sendmsg
24 | system call.
25 | Either may be used to transmit a request message using the Homa
26 | transport protocol.
27 | The argument
28 | .I sockfd
29 | is the file descriptor of the sending socket; this must be a Homa socket.
30 | With
31 | .BR homa_send
32 | the request message is stored in a single contiguous buffer pointed to by
33 | .IR message_buf ,
34 | and the argument
35 | .I length
36 | gives the length of the message in bytes.
37 | With
38 | .BR homa_sendv
39 | the request message consists of multiple disjoint chunks, specified
40 | by
41 | .I iovcnt
42 | descriptors at
43 | .IR iov .
44 | In either case, the total message length must not exceed
45 | .BR HOMA_MAX_MESSAGE_LENGTH .
46 | The destination socket for the request is given by
47 | .IR dest_addr ,
48 | which can hold either an IPv4 or IPv6 address. The length of
49 | the address is given by
50 | .IR addrlen .
51 | If
52 | .I id
53 | is not NULL, an identifier for the request is returned at
54 | .IR *id.
55 | The identifier will be unique among all requests issued on
56 | .IR sockfd ,
57 | and can be used to match the request with a response returned later by
58 | .BR homa_reply (3).
59 | The
60 | .I completion_cookie
61 | argument provides application-specific identifying information about the RPC,
62 | such as the address of a data structure used to manage the
63 | RPC; it will be returned by
64 | .BR homa_recv
65 | when the RPC completes.
66 | .PP
67 | This function returns as soon as the message has been queued for
68 | transmission.
69 | 
70 | .SH RETURN VALUE
71 | On success, the return value is 0 and an identifier for the request
72 | is stored in
73 | .I *id
74 | (if
75 | .I id
76 | is not NULL).
77 | The  identifier can be used later to match the request
78 | with the corresponding response, using
79 | .BR homa_reply (3).
80 | On error, \-1 is returned and
81 | .I errno
82 | is set appropriately.
83 | .SH ERRORS
84 | After an error return,
85 | .I errno
86 | will contain additional information about the cause of the error.
87 | See
88 | .BR sendmsg (2)
89 | for details.
90 | .SH SEE ALSO
91 | .BR recvmsg (2),
92 | .BR sendmsg (2),
93 | .BR homa_abort (3),
94 | .BR homa_reply (3),
95 | .BR homa (7)
96 | 


--------------------------------------------------------------------------------
/man/sendmsg.2:
--------------------------------------------------------------------------------
  1 | .TH SENDMSG 2 2023-11-2 "Homa" "Linux Programmer's Manual"
  2 | .SH NAME
  3 | sendmsg \- send a Homa request or response message
  4 | .SH SYNOPSIS
  5 | .nf
  6 | .B #include <homa.h>
  7 | .PP
  8 | .BI "ssize_t sendmsg(int " sockfd ", const struct msghdr *" msg ", int " flags );
  9 | .fi
 10 | .SH DESCRIPTION
 11 | The
 12 | .B sendmsg
 13 | kernel call is used to send request and response messages on Homa sockets.
 14 | The
 15 | .I sockfd
 16 | argument must refer to a Homa socket. The
 17 | .I msg
 18 | argument describes the message to send and the destination where it
 19 | should be sent (more details below). The
 20 | .I flags
 21 | argument is not used for Homa messages.
 22 | .PP
 23 | The
 24 | .B msg
 25 | argument must point to a structure of the following type:
 26 | .PP
 27 | .in +4n
 28 | .ps -1
 29 | .vs -2
 30 | .EX
 31 | struct msghdr {
 32 |     void         *msg_name;       /* Address of destination (sockaddr_in
 33 |                                    * or sockaddr_in6).
 34 |     socklen_t     msg_namelen;    /* Size of address. */
 35 |     struct iovec *msg_iov;        /* Message contents: one or more extents. */
 36 |     size_t        msg_iovlen;     /* Number of elements in msg_iov. */
 37 |     void         *msg_control;    /* Address of homa_sendmsg_args struct. */
 38 |     size_t        msg_controllen; /* Must always be zero (if not, sendmsg will
 39 |                                    * fail with EINVAL, for arcane reasons). */
 40 |     int           msg_flags;      /* Not used by Homa. */
 41 | };
 42 | .EE
 43 | .vs +2
 44 | .ps +1
 45 | .in
 46 | .PP
 47 | Homa requires additional information beyond what can be represented in a
 48 | .BR "struct msghdr" ,
 49 | so the
 50 | .B msg_control
 51 | field must refer to a structure of the following type:
 52 | .PP
 53 | .in +4n
 54 | .ps -1
 55 | .vs -2
 56 | .EX
 57 | struct homa_sendmsg_args {
 58 |     uint64_t id;                  /* RPC identifier. */
 59 |     uint64_t completion_cookie;   /* For requests only; value to return
 60 |                                    * along with response. */
 61 | };
 62 | .EE
 63 | .vs +2
 64 | .ps +1
 65 | .in
 66 | .PP
 67 | If the
 68 | .B id
 69 | field of the
 70 | .B homa_sendmsg_args
 71 | is initially 0, then a new RPC will be created and a request message
 72 | will be sent as described by
 73 | .I msg\c
 74 | ->\c
 75 | .B msg_name
 76 | and
 77 | .IR msg ->\c
 78 | .BR msg_iov ;
 79 | the
 80 | .B id
 81 | field will be overwritten with the identifier of the new RPC, which is
 82 | unique among all RPCs initiated via
 83 | .IR sockfd .
 84 | This identifier can be used to wait for the response with
 85 | .BR recvmsg .
 86 | If
 87 | .B id
 88 | is nonzero, then a response message will be sent for the RPC identified
 89 | uniquely by
 90 | .B id
 91 | and
 92 | .IR msg ->\c
 93 | .BR msg_name .
 94 | .PP
 95 | .B sendmsg
 96 | returns as soon as the message has been queued for transmission.
 97 | .SH RETURN VALUE
 98 | The return value is 0 for success and -1 if an error occurred.
 99 | .SH ERRORS
100 | .PP
101 | When
102 | .B sendmsg
103 | returns an error,
104 | .I errno
105 | will have one of the following values:
106 | .TP
107 | .B EAFNOSUPPORT
108 | The address family specified by
109 | .I dest_addr
110 | does not match the address family specified when the socket was opened
111 | .RB ( AF_INET
112 | or
113 | .BR AF_INET6 ).
114 | .TP
115 | .B EBADF
116 | .I sockfd
117 | is not a valid open file descriptor.
118 | .TP
119 | .B EFAULT
120 | An invalid user space address was specified for an argument.
121 | .TP
122 | .B EINVAL
123 | The size of the request exceeded
124 | .B HOMA_MAX_MESSAGE_LENGTH, or
125 | .I sockfd
126 | was not a Homa socket, or a nonzero completion cookie was specified
127 | for a response message, or the
128 | .B id
129 | for a response message does not match an existing RPC for which a
130 | request message has been received.
131 | .TP
132 | .B ENOMEM
133 | Memory could not be allocated for internal data structures needed
134 | for the message.
135 | .TP
136 | .B ESHUTDOWN
137 | The socked has been disabled using
138 | .BR shutdown (2).
139 | .SH SEE ALSO
140 | .BR recvmsg (2),
141 | .BR homa_abort (3),
142 | .BR homa_reply (3),
143 | .BR homa_send (3),
144 | .BR homa (7)
145 | 


--------------------------------------------------------------------------------
/perf/README.md:
--------------------------------------------------------------------------------
1 | This directory contains various performance measurements of the
2 | Linux kernel implementation of the Homa transport protocol.
3 | 


--------------------------------------------------------------------------------
/perf/plot_length_cdf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | # Copyright (c) 2019-2020 Homa Developers
  4 | # SPDX-License-Identifier: BSD-1-Clause
  5 | 
  6 | # This script generates a plot showing the CDF of message lengths,
  7 | # gathered from one or more experiment runs.
  8 | #
  9 | # Usage: plot_length_dist.py name pattern name pattern ...
 10 | #
 11 | # Each "name" argument gives the name a workload, which will appear in the
 12 | # graph keys. Each "pattern" argument gives a glob string (such as
 13 | # ("logs/w1/loaded*.txt") describine one or more data files that measure
 14 | # the distribution.
 15 | 
 16 | import glob
 17 | import matplotlib.pyplot as plt
 18 | import numpy as np
 19 | import os
 20 | import string
 21 | import sys
 22 | 
 23 | # Keys are message lengths, values are number of messages of that length.
 24 | counts = {}
 25 | 
 26 | def read_rtts(file, column):
 27 |     """
 28 |     Read file and add its data to the counts array. The "column" argument
 29 |     indicates which argument of each line contains the message length.
 30 |     """
 31 |     global counts
 32 | 
 33 |     print("Reading %s" % file)
 34 |     f = open(file, "r")
 35 |     for line in f:
 36 |         stripped = line.strip()
 37 |         if stripped[0] == '#':
 38 |             continue
 39 |         words = stripped.split()
 40 |         if (len(words) < (column+1)):
 41 |             print("Line too short (no column %d): '%s'" % (line, column))
 42 |             continue
 43 |         size = int(words[column])
 44 |         if size in counts:
 45 |             counts[size] += 1
 46 |         else:
 47 |             counts[size] = 1
 48 |     f.close()
 49 | 
 50 | if (len(sys.argv) < 3) or not (len(sys.argv) & 1):
 51 |     print("Usage: %s name pattern name pattern ..." % (sys.argv[0]))
 52 |     exit(1)
 53 | 
 54 | workloads = []
 55 | for i in range(1, len(sys.argv), 2):
 56 |     info = {}
 57 |     info["name"] = sys.argv[i]
 58 |     pattern = sys.argv[i+1]
 59 | 
 60 |     counts = {}
 61 |     got_data = False
 62 |     for f in glob.glob(pattern):
 63 |         read_rtts(f, 0)
 64 |         got_data = True
 65 |     if not got_data:
 66 |         print("Couldn't find any files corresponding to '%s'" % (pattern))
 67 |         continue
 68 | 
 69 |     info["total_msgs"] = 0.0
 70 |     info["total_bytes"] = 0.0
 71 | 
 72 |     for length in counts:
 73 |         info["total_msgs"] += counts[length]
 74 |         info["total_bytes"] += length*counts[length]
 75 | 
 76 |     lengths = sorted(counts.keys())
 77 |     messages = 0
 78 |     bytes = 0
 79 |     info["x"] = []
 80 |     info["cum_msgs"] = []
 81 |     info["cum_bytes"] = []
 82 |     for l in lengths:
 83 |         info["x"].append(l)
 84 |         info["cum_msgs"].append(messages)
 85 |         info["cum_bytes"].append(bytes)
 86 |         messages += counts[l]/info["total_msgs"]
 87 |         bytes += (l * counts[l])/info["total_bytes"]
 88 |         info["x"].append(l)
 89 |         info["cum_msgs"].append(messages)
 90 |         info["cum_bytes"].append(bytes)
 91 | #       print("Length %d, CF messages %.2f, CF bytes %.2f" % (
 92 | #                 l, messages, bytes))
 93 |     workloads.append(info)
 94 | 
 95 | plt.subplot(211)
 96 | plt.axis([10, 1500000, 0, 1.0])
 97 | plt.xscale("log")
 98 | plt.xlabel("Message Length")
 99 | plt.ylabel("Cum. Frac. Messages")
100 | plt.grid(which="major", axis="both")
101 | 
102 | for w in workloads:
103 |     plt.plot(w["x"], w["cum_msgs"], label=w["name"])
104 | plt.legend()
105 | 
106 | plt.subplot(212)
107 | plt.axis([10, 1500000, 0, 1.0])
108 | plt.xscale("log")
109 | plt.xlabel("Message Length")
110 | plt.ylabel("Cum. Frac. Bytes")
111 | plt.grid(which="major", axis="both")
112 | 
113 | for w in workloads:
114 |     print("Plotting workload %s" % (w["name"]))
115 |     plt.plot(w["x"], w["cum_bytes"], label=w["name"])
116 | plt.legend()
117 | 
118 | plt.savefig('length.pdf')


--------------------------------------------------------------------------------
/perf/rtt.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PlatformLab/HomaModule/c4f579f9a83728baeb638a6f11c15e8f0ddf65d8/perf/rtt.xlsx


--------------------------------------------------------------------------------
/reap.txt:
--------------------------------------------------------------------------------
 1 | This file discusses issues related to freeing resources for completed RPCs
 2 | ("reaping").
 3 | 
 4 | * Most of the cost of reaping comes from freeing skbuffs; this can be
 5 |   quite expensive for RPCs with long messages.
 6 | 
 7 | * The natural time to reap is when homa_rpc_free is invoked to mark an
 8 |   RPC completed, but this can result in severe performance hiccups. For
 9 |   example, a server RPC is freed once the last packet of the response
10 |   has been transmitted, but this can happen in homa_softirq in response
11 |   to an incoming grant, and there may be other short messages waiting
12 |   to be processed. Freeing a long RPC could result in significant delay
13 |   for a subsequent short RPC.
14 | 
15 | * Thus Homa doesn't reap immediately in homa_rpc_free. Instead, dead RPCs
16 |   are queued up and reaping occurs later, at a more convenient time where
17 |   it is less likely to impact latency. The challenge is to figure out how to
18 |   do this so that (a) we keep up with dead RPCs and (b) we minimize
19 |   the impact of reaping on latency.
20 | 
21 | * The ideal time to reap is when threads are waiting for incoming messages
22 |   in homa_wait_for_message. The thread has nothing else to do, so reaping
23 |   can be performed with no latency impact on the application.  However,
24 |   if a machine is overloaded then it may never wait, so this mechanism
25 |   isn't always sufficient.
26 | 
27 | * Homa now reaps in two other places, if homa_wait_for_message can't
28 |   keep up:
29 |   * If dead_buffs_limit dead skbs accumulate, then homa_timer will
30 |     reap to get down to that limit. However, it seems possible that
31 |     there may be cases where a single thread cannot keep up with all
32 |     the reaping to be done.
33 |   * If homa_timer can't keep up, then as a last resort, homa_pkt_dispatch
34 |     will reap a few buffers for every incoming data packet. This is undesirable
35 |     because it will impact Homa's performance.
36 | 
37 | * In addition, during the conversion to the new input buffering scheme for 2.0,
38 |   freeing of packets for incoming messages was moved to homa_copy_to_user,
39 |   under the assumption that this code wouldn't be on the critical path.
40 |   However, right now the packet freeing is taking 20-25% of the total
41 |   time in that function, and with faster networks it's quite possible that
42 |   this code will indeed be on the critical path. So, it probably shouldn't
43 |   be doing packet freeing after all.
44 | 
45 | * Here are some approaches that have been tried and eventually abandoned:
46 |   * Occasionally when data packets arrive, reap if too much dead info has
47 |     accumulated. This will cause a latency impact. The amount to reap is
48 |     chosen dynamically (by homa_timer) to be as small as possible while
49 |     gradually working through the backlog. Unfortunately, the formula for
50 |     computing how much to reap was fragile and resulted in situations where
51 |     the backlog of dead RPCs grew without bound. This approach was abandoned
52 |     in October 2021.
53 | 


--------------------------------------------------------------------------------
/rsync-exclude.txt:
--------------------------------------------------------------------------------
 1 | # This file lists directories and files that should not be copied
 2 | # to rcmaster by rsync.
 3 | .git
 4 | nbproject
 5 | private
 6 | cloudlab
 7 | reports
 8 | *traces*
 9 | bytedance
10 | mle
11 | __pycache__
12 | *.data
13 | *.pyc
14 | *.pdf
15 | *.tt
16 | *.log
17 | tmp*
18 | *.out
19 | erfan
20 | 


--------------------------------------------------------------------------------
/sync.txt:
--------------------------------------------------------------------------------
 1 | This file describes the synchronization strategy used for Homa.
 2 | 
 3 | * In the Linux TCP/IP stack, the primary locking mechanism is a lock
 4 |   per socket. However, per-socket locks aren't adequate for Homa, because
 5 |   sockets are "larger" in Homa. In TCP, a socket corresponds to a single
 6 |   connection between the source and destination; an application can have
 7 |   hundreds or thousands of sockets open at once, so per-socket locks leave
 8 |   lots of opportunities for concurrency. With Homa, a single socket can be
 9 |   used for communicating with any number of peers, so there will typically
10 |   be no more than one socket per thread. As a result, a single Homa socket
11 |   must support many concurrent RPCs efficiently, and a per-socket lock would
12 |   create a bottleneck (Homa tried this approach initially).
13 | 
14 | * Thus, the primary lock used in Homa is a per-RPC spinlock. This allows operations
15 |   on different RPCs to proceed concurrently. RPC locks are actually stored in
16 |   the hash table buckets used to look them up. This is important because it
17 |   makes looking up RPCs and locking them atomic. Without this approach it
18 |   is possible that an RPC could get deleted after it was looked up but before
19 |   it was locked.
20 | 
21 | * Certain operations are not permitted while holding spinlocks, such as memory
22 |   allocation and copying data to/from user space (spinlocks disable
23 |   interrupts, so the holder must not block). RPC locks are spinlocks,
24 |   and that results in awkward code in several places to move prohibited
25 |   operations outside the locked regions. In particular, there is extra
26 |   complexity to make sure that RPCs are not garbage-collected while these
27 |   operations are occurring without a lock.
28 | 
29 | * There are several other locks in Homa besides RPC locks. When multiple
30 |   locks are held, they must always be acquired in a consistent order, in
31 |   order to prevent deadlock. For each lock, here are the other locks that
32 |   may be acquired while holding the given lock.
33 |   * RPC: socket, grantable, throttle, peer->ack_lock
34 |   * Socket: port_map.write_lock
35 |   Any lock not listed above must be a "leaf" lock: no other lock will be
36 |   acquired while holding the lock.
37 | 
38 | * Homa's approach means that socket shutdown and deletion can potentially
39 |   occur while operations are underway that hold RPC locks but not the socket
40 |   lock. This creates several potential problems:
41 |   * A socket might be deleted and its memory reclaimed while an RPC still
42 |     has access to it. Home assumes that Linux will prevent socket deletion
43 |     while the kernel call is executing. In situations outside kernel call
44 |     handling, Homa uses rcu_read_lock to prevent socket deletion.
45 |   * A socket might be shut down while there are active operations on
46 |     RPCs. For example, a new RPC creation might be underway when a socket
47 |     is shut down, which could add the new RPC after all of its RPCs
48 |     have supposedly been deleted. Handling this requires careful ordering
49 |     of operations during shutdown, plus the rest of Homa must be careful
50 |     never to add new RPCs to a socket that has been shut down.
51 | 
52 | * There are a few places where Homa needs to process RPCs on lists
53 |   associated with a socket, such as the timer. Such code must first lock
54 |   the socket (to synchronize access to the link pointers) then lock
55 |   individual RPCs on the list. However, this violates the rules for locking
56 |   order. It isn't safe to unlock the socket before locking the RPC, because
57 |   the RPC could be deleted and its memory recycled between the unlock of the
58 |   socket lock and the lock of the RPC; this could result in corruption. Homa
59 |   uses a few different ways to handle this situation:
60 |   * Use homa_protect_rpcs to prevent RPC reaping for a socket. RPCs can still
61 |     be deleted, but their memory won't go away until homa_unprotect_rpcs is
62 |     invoked. This allows the socket lock to be released before acquiring
63 |     the RPC lock; after acquiring the RPC lock check to see if it has been
64 |     deleted; if so, skip it.  Note: the Linux RCU mechanism could have been
65 |     used to achieve the same effect, but it results in *very* long delays
66 |     before final reclamation (tens of ms), even without contention, which
67 |     means that a large number of dead RPCs could accumulate.
68 |   * Use spin_trylock_bh to acquire the RPC lock, while still holding the
69 |     socket lock. If this fails, then release the socket lock, then retry
70 |     both the socket lock and the RPC lock.
71 | 
72 | * There are also a few places where Homa is doing something related to an
73 |   RPC (such as copying message data to user space) and needs the RPC to stay
74 |   around, but it isn't holding the RPC lock. In this situations, Homa sets
75 |   a bit in rpc->flags and homa_rpc_reap will not reap RPCs with any of these
76 |   flags set.


--------------------------------------------------------------------------------
/test/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile to run unit tests for Homa
  2 | 
  3 | KDIR ?= /lib/modules/$(shell uname -r)/build
  4 | CC ?= gcc
  5 | CXX ?= g++
  6 | PERL ?= perl
  7 | ARCH ?= x86
  8 | 
  9 | all: test
 10 | 
 11 | KERN_INCLUDES := \
 12 | 	    -I$(KDIR)/arch/x86/include \
 13 | 	    -I$(KDIR)/arch/x86/include/generated \
 14 | 	    -I$(KDIR)/include \
 15 | 	    -I$(KDIR)/arch/x86/include/uapi \
 16 | 	    -I$(KDIR)/arch/x86/include/generated/uapi \
 17 | 	    -I$(KDIR)/include/uapi \
 18 | 	    -I$(KDIR)/include/generated/uapi
 19 | CINCLUDES := \
 20 | 	    -I. \
 21 | 	    -I.. \
 22 | 	    $(KERN_INCLUDES) \
 23 | 	    -include $(KDIR)/include/linux/kconfig.h
 24 | CCINCLUDES := \
 25 | 	    -I. \
 26 | 	    -I.. \
 27 | 	    $(KERN_KINCLUDES)
 28 | 
 29 | DEFS :=      -D__KERNEL__ \
 30 | 	     -D__UNIT_TEST__ \
 31 | 	     -D KBUILD_MODNAME='"homa"'
 32 | 
 33 | WARNS :=     -Wall -Wundef -Wno-trigraphs -Wno-sign-compare \
 34 | 		-Wno-strict-aliasing -Werror
 35 | CFLAGS :=    $(WARNS) -Wstrict-prototypes -MD -g $(CINCLUDES) $(DEFS)
 36 | CCFLAGS :=   -std=c++11 $(WARNS) -MD -g $(CCINCLUDES) $(DEFS) -fsanitize=address
 37 | 
 38 | TEST_SRCS :=  unit_homa_grant.c \
 39 | 	      unit_homa_incoming.c \
 40 | 	      unit_homa_offload.c \
 41 | 	      unit_homa_metrics.c \
 42 | 	      unit_homa_outgoing.c \
 43 | 	      unit_homa_peer.c \
 44 | 	      unit_homa_pool.c \
 45 | 	      unit_homa_plumbing.c \
 46 | 	      unit_homa_rpc.c \
 47 | 	      unit_homa_skb.c \
 48 | 	      unit_homa_sock.c \
 49 | 	      unit_homa_timer.c \
 50 | 	      unit_homa_utils.c \
 51 | 	      unit_timetrace.c
 52 | TEST_OBJS :=  $(patsubst %.c,%.o,$(TEST_SRCS))
 53 | 
 54 | HOMA_SRCS :=  homa_grant.c \
 55 | 	      homa_incoming.c \
 56 | 	      homa_metrics.c \
 57 | 	      homa_offload.c \
 58 | 	      homa_outgoing.c \
 59 | 	      homa_peer.c \
 60 | 	      homa_pool.c \
 61 | 	      homa_plumbing.c \
 62 | 	      homa_rpc.c \
 63 | 	      homa_skb.c \
 64 | 	      homa_sock.c \
 65 | 	      homa_timer.c \
 66 | 	      homa_utils.c \
 67 | 	      timetrace.c
 68 | HOMA_OBJS :=  $(patsubst %.c,%.o,$(HOMA_SRCS))
 69 | 
 70 | OTHER_SRCS := ccutils.cc \
 71 | 	      main.c \
 72 | 	      mock.c \
 73 | 	      utils.c
 74 | OTHER_OBJS := $(patsubst %.c,%.o,$(patsubst %.cc,%.o,$(OTHER_SRCS)))
 75 | 
 76 | OBJS := $(TEST_OBJS) $(HOMA_OBJS) $(OTHER_OBJS)
 77 | 
 78 | CLEANS = unit $(OBJS) *.d .deps
 79 | 
 80 | # This seems to be the only way to disable the built-in implicit rules
 81 | # for %:%.c and %:%.cc.
 82 | .SUFFIXES:
 83 | 
 84 | %.o: ../%.c
 85 | 	$(CC) -c $(CFLAGS) $< -o $@
 86 | %.e: ../%.c
 87 | 	$(CC) -E $(CFLAGS) $< -o $@
 88 | %.o: %.c
 89 | 	$(CC) -c $(CFLAGS) $< -o $@
 90 | %.e: %.c
 91 | 	$(CC) -E $(CFLAGS) $< -o $@
 92 | %.o: %.cc
 93 | 	$(CXX) -c $(CCFLAGS) $< -o $@
 94 | %.e: %.cc
 95 | 	$(CXX) -E $(CCFLAGS) $< -o $@
 96 | 
 97 | unit: $(OBJS)
 98 | 	$(CXX) $(CFLAGS) $^ -o $@ -lasan
 99 | 
100 | test: unit
101 | 	./unit
102 | 
103 | # Additional definitions for running unit tests using stripped sources.
104 | 
105 | S_HOMA_SRCS := $(patsubst %,stripped/%,$(filter-out timetrace.c, $(HOMA_SRCS)))
106 | S_HOMA_OBJS :=  $(patsubst %.c,%.o,$(S_HOMA_SRCS))
107 | S_HOMA_HDRS := stripped/homa.h \
108 | 		stripped/homa_impl.h \
109 | 		stripped/homa_peer.h \
110 | 		stripped/homa_pool.h \
111 | 		stripped/homa_receiver.h \
112 | 		stripped/homa_rpc.h \
113 | 		stripped/homa_sock.h \
114 | 		stripped/homa_stub.h \
115 | 		stripped/homa_wire.h
116 | stripped/%.c: ../%.c
117 | 	../util/strip.py --alt $< > $@
118 | stripped/%.h: ../%.h
119 | 	../util/strip.py --alt $< > $@
120 | S_TEST_OBJS := $(patsubst %,stripped/%,$(filter-out unit_timetrace.o, $(TEST_OBJS)))
121 | S_OBJS := $(S_HOMA_OBJS) $(S_TEST_OBJS) $(patsubst %,stripped/%,$(OTHER_OBJS))
122 | 
123 | $(S_OBJS): | stripped $(S_HOMA_HDRS)
124 | 
125 | stripped:
126 | 	mkdir -p stripped
127 | 
128 | stripped/%.o: stripped/%.c
129 | 	$(CC) -c $(patsubst -I..,-Istripped,$(CFLAGS)) $< -o $@
130 | stripped/%.o: %.c
131 | 	$(CC) -c $(patsubst -I..,-Istripped,$(CFLAGS)) $< -o $@
132 | stripped/%.o: %.cc
133 | 	$(CXX) -c $(patsubst -I..,-Istripped,$(CCFLAGS)) $< -o $@
134 | 
135 | s_unit: $(S_OBJS)
136 | 	$(CXX) $(CFLAGS) $^ -o $@ -lasan
137 | 
138 | s_test: s_unit
139 | 	./s_unit
140 | 
141 | CLEANS += s_unit
142 | 
143 | # The target below shouldn't be needed: theoretically, any code that is
144 | # sensitive to IPv4 vs. IPv6 should be tested explicitly, regardless of
145 | # the --ipv4 argument.
146 | test_both: unit
147 | 	@echo "Testing with IPv4 default:"
148 | 	./unit --ipv4
149 | 	@echo "Testing with IPv6 default:"
150 | 	./unit
151 | 
152 | clean:
153 | 	rm -f $(CLEANS)
154 | 	rm -rf stripped
155 | 
156 | # This magic (along with the -MD gcc option) automatically generates makefile
157 | # dependencies for header files included from C source files we compile,
158 | # and keeps those dependencies up-to-date every time we recompile.
159 | # See 'mergedep.pl' for more information.
160 | .deps: $(wildcard *.d stripped/*.d)
161 | 	@mkdir -p $(@D)
162 | 	$(PERL) mergedep.pl $@ $^
163 | -include .deps
164 | 
165 | # The following target is useful for debugging Makefiles; it
166 | # prints the value of a make variable.
167 | print-%:
168 | 	@echo $* = $($*)
169 | 


--------------------------------------------------------------------------------
/test/README.md:
--------------------------------------------------------------------------------
 1 | This directory contains unit tests for the Homa Linux kernel module.
 2 | Here are a few overall notes:
 3 | 
 4 | * These are "white box" tests, not "black box" tests. Tests are written
 5 |   by looking at the code and writing enough tests to make sure all of the
 6 |   major code elements are covered.
 7 | 
 8 | * The structure of the unit tests is isomorphic to the structure of the
 9 |   code:
10 |   * There is one test file in this directory for each code file. For example,
11 |     `unit_homa_incoming.c` contains unit tests for `../homa_incoming.c`.
12 |   * Within the test file, there is a block of tests for each function in the
13 |     corresponding code file, and the test blocks occur in the same order
14 |     as the functions. If you move functions around, move the tests around
15 |     to maintain isomorphism.
16 |   * The tests for each function are ordered according to which lines of code
17 |     in the function they test. Typically, a given test will test one or a few
18 |     lines of the function. The order of the tests matches the order of the
19 |     code ranges they test. With this approach, it's easy to scan the tests
20 |     for a function after you make changes the see if you need to add more
21 |     tests.
22 |   * Some functions will have an initial test labeled "basic" or "sanity check".
23 |     These initial tests may exercise a variety of features in the function;
24 |     remaining tests only need to cover things not exercised by the initial
25 |     test.
26 | 
27 | * The name of a test indicates what function it is testing, and also gives
28 |   a very terse synopsis of what is being tested. For example, consider this
29 |   test from `homa_incoming.c`:
30 |   ```
31 |   TEST_F(homa_incoming, homa_add_packet__packet_overlaps_message_end)
32 |   {
33 |     ...
34 |   }
35 |   ```
36 |   The name of the test is `homa_add_packet__packet_overlaps_message_end`;
37 |   the test exercises the function `homa_add_packet`, and the particular
38 |   case is a new arriving packet that extends past the end of the message.
39 | 
40 | * In general, tests should be disaggregated so that each test only tests a small
41 |   amount of functionality. Avoid large tests that test many different things.
42 | 
43 | * In writing tests, focus on the control structure. For example, there should
44 |   be tests for each branch of an `if` statement. For loops, be sure to
45 |   include tests that involve multiple iterations of the loop.
46 | 
47 | * You don't need to individually test each side effect of a collection of
48 |   straight-line statements; testing one or two of them is fine.
49 | 
50 | * The file `mock.c` mocks out Linux kernel functions invoked by the code
51 |   being tested. Where relevant, the mocking code may record information about
52 |   how it was invoked and/or allow for the injection of errors in results.
53 | 
54 | * It should be possible to exercise virtually every line of code in Homa.
55 |   If it appears that you cannot exercise a particular line, check to see
56 |   whether `mock.c` has mechanisms you can use to get the desired effect.
57 |   If not, consider extending `mock.c` to provide whatever you need.
58 | 
59 | * Feel free to contact John Ousterhout if you're having trouble figuring out
60 |   how to test a particular piece of code.
61 | 


--------------------------------------------------------------------------------
/test/ccutils.h:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2019-2022 Homa Developers
 2 |  * SPDX-License-Identifier: BSD-1-Clause
 3 |  */
 4 | 
 5 | /* Utility functions for unit tests, implemented in C++. */
 6 | 
 7 | #ifdef __cplusplus
 8 | #define CEXTERN extern "C"
 9 | #else
10 | #define CEXTERN extern
11 | #endif
12 | 
13 | struct unit_hash;
14 | 
15 | CEXTERN void          unit_fill_data(unsigned char *data, int length,
16 | 			int first_value);
17 | CEXTERN void          unit_hash_erase(struct unit_hash *hash, const void *key);
18 | CEXTERN void          unit_hash_free(struct unit_hash *hash);
19 | CEXTERN void         *unit_hash_get(struct unit_hash *hash, const void *key);
20 | CEXTERN struct unit_hash *
21 |                       unit_hash_new(void);
22 | CEXTERN void          unit_hash_set(struct unit_hash *hash, const void *key,
23 | 				void *value);
24 | CEXTERN int           unit_hash_size(struct unit_hash *hash);
25 | CEXTERN void          unit_hook(char *id);
26 | CEXTERN void          unit_hook_clear(void);
27 | CEXTERN void          unit_hook_register(void hook_proc(char *id));
28 | CEXTERN void          unit_log_add_separator(char *sep);
29 | CEXTERN void          unit_log_clear(void);
30 | CEXTERN void          unit_log_data(const char *separator, unsigned char *data,
31 | 				int length);
32 | CEXTERN int           unit_log_empty(void);
33 | CEXTERN const char   *unit_log_get(void);
34 | CEXTERN void          unit_log_printf(const char *separator,
35 | 				const char* format, ...)
36 | 				__attribute__((format(printf, 2, 3)));


--------------------------------------------------------------------------------
/test/main.c:
--------------------------------------------------------------------------------
 1 | // SPDX-License-Identifier: BSD-2-Clause
 2 | 
 3 | /* Main program for running Homa unit tests. */
 4 | 
 5 | #include "homa_impl.h"
 6 | #include "kselftest_harness.h"
 7 | #include "mock.h"
 8 | 
 9 | static char *helpMessage =
10 | 	"This program runs unit tests written in the Linux kernel kselftest style.\n"
11 | 	"    Usage: %s options test_name test_name ...\n"
12 | 	"The following options are supported:\n"
13 | 	"    --help or -h      Print this message\n"
14 | 	"    --ipv4            Simulate IPv4 for all packets (default: use IPv6)\n"
15 | 	"    --verbose or -v   Print the names of all tests as they run (default:\n"
16 | 	"                      print only tests that fail)\n"
17 | 	"If one or more test_name arguments are provided, then only those tests are\n"
18 | 	"run; if no test names are provided, then all tests are run.\n"
19 | 	"\n"
20 | 	"Note: the tests should provide complete coverage of both IPv4 and IPv6 without\n"
21 | 	"using the --ipv4 argument (code that depends on IPv4 vs. IPv6 already has\n"
22 | 	"special test cases for each); --ipv4 is provided for occasional double-checking.\n";
23 | 
24 | int main(int argc, char **argv)
25 | {
26 | 	int verbose = 0;
27 | 	int i;
28 | 
29 | 	mock_ipv6_default = true;
30 | 	for (i = 1; i < argc; i++) {
31 | 		if ((strcmp(argv[i], "-h") == 0) ||
32 | 			(strcmp(argv[i], "--help") == 0)) {
33 | 			printf(helpMessage, argv[0]);
34 | 			return 0;
35 | 		} else if (strcmp(argv[i], "--ipv4") == 0) {
36 | 			mock_ipv6_default = false;
37 | 		} else if ((strcmp(argv[i], "-v") == 0) ||
38 | 			(strcmp(argv[i], "--verbose") == 0)) {
39 | 			verbose = 1;
40 | 		} else if (argv[i][0] == '-') {
41 | 			printf("Unknown option %s; type '%s --help' for help\n",
42 | 				argv[i], argv[0]);
43 | 			return 1;
44 | 		} else
45 | 			break;
46 | 	}
47 | 	test_harness_run(argc-i, argv+i, verbose);
48 | }
49 | 


--------------------------------------------------------------------------------
/test/mergedep.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # Copyright 2003 Bryan Ford
 3 | # Distributed under the GNU General Public License.
 4 | #
 5 | # Usage: mergedep <main-depfile> [<new-depfiles> ...]
 6 | #
 7 | # This script merges the contents of all <new-depfiles> specified
 8 | # on the command line into the single file <main-depfile>,
 9 | # which may or may not previously exist.
10 | # Dependencies in the <new-depfiles> will override
11 | # any existing dependencies for the same targets in <main-depfile>.
12 | # The <new-depfiles> are deleted after <main-depfile> is updated.
13 | #
14 | # The <new-depfiles> are typically generated by GCC with the -MD option,
15 | # and the <main-depfile> is typically included from a Makefile,
16 | # as shown here for GNU 'make':
17 | #
18 | #	.deps: $(wildcard *.d)
19 | #		perl mergedep $@ $^
20 | #	-include .deps
21 | #
22 | # This script properly handles multiple dependencies per <new-depfile>,
23 | # including dependencies having no target,
24 | # so it is compatible with GCC3's -MP option.
25 | #
26 | 
27 | sub readdeps {
28 | 	my $filename = shift;
29 | 
30 | 	open(DEPFILE, $filename) or return 0;
31 | 	while (<DEPFILE>) {
32 | 		if (/([^:]*):([^\\:]*)([\\]?)$/) {
33 | 			my $target = $1;
34 | 			my $deplines = $2;
35 | 			my $slash = $3;
36 | 			while ($slash ne '') {
37 | 				$_ = <DEPFILE>;
38 | 				defined($_) or die
39 | 					"Unterminated dependency in $filename";
40 | 				/(^[ \t][^\\]*)([\\]?)$/ or die
41 | 					"Bad continuation line in $filename";
42 | 				$deplines = "$deplines\\\n$1";
43 | 				$slash = $2;
44 | 			}
45 | 			#print "DEPENDENCY [[$target]]: [[$deplines]]\n";
46 | 			$dephash{$target} = $deplines;
47 | 		} elsif (/^[#]?[ \t]*$/) {
48 | 			# ignore blank lines and comments
49 | 		} else {
50 | 			die "Bad dependency line in $filename: $_";
51 | 		}
52 | 	}
53 | 	close DEPFILE;
54 | 	return 1;
55 | }
56 | 
57 | 
58 | if ($#ARGV < 0) {
59 | 	print "Usage: mergedep <main-depfile> [<new-depfiles> ..]\n";
60 | 	exit(1);
61 | }
62 | 
63 | %dephash = ();
64 | 
65 | # Read the main dependency file
66 | $maindeps = $ARGV[0];
67 | readdeps($maindeps);
68 | 
69 | # Read and merge in the new dependency files
70 | foreach $i (1 .. $#ARGV) {
71 | 	readdeps($ARGV[$i]) or die "Can't open $ARGV[$i]";
72 | }
73 | 
74 | # Update the main dependency file
75 | open(DEPFILE, ">$maindeps.tmp") or die "Can't open output file $maindeps.tmp";
76 | foreach $target (keys %dephash) {
77 | 	print DEPFILE "$target:$dephash{$target}";
78 | }
79 | close DEPFILE;
80 | rename("$maindeps.tmp", "$maindeps") or die "Can't overwrite $maindeps";
81 | 
82 | # Finally, delete the new dependency files
83 | foreach $i (1 .. $#ARGV) {
84 | 	unlink($ARGV[$i]) or print "Error removing $ARGV[$i]\n";
85 | }
86 | 
87 | 


--------------------------------------------------------------------------------
/test/mock.h:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2019-2022 Homa Developers
 2 |  * SPDX-License-Identifier: BSD-1-Clause
 3 |  */
 4 | 
 5 | /* Functions for mocking that are exported to test code. */
 6 | 
 7 | extern int         mock_alloc_page_errors;
 8 | extern int         mock_alloc_skb_errors;
 9 | extern int         mock_bpage_size;
10 | extern int         mock_bpage_shift;
11 | extern int         mock_compound_order_mask;
12 | extern int         mock_copy_data_errors;
13 | extern int         mock_copy_to_user_dont_copy;
14 | extern int         mock_copy_to_user_errors;
15 | extern int         mock_cpu_idle;
16 | extern cycles_t    mock_cycles;
17 | extern int         mock_import_iovec_errors;
18 | extern int         mock_import_ubuf_errors;
19 | extern int         mock_ip6_xmit_errors;
20 | extern int         mock_ip_queue_xmit_errors;
21 | extern bool        mock_ipv6;
22 | extern bool        mock_ipv6_default;
23 | extern int         mock_kmalloc_errors;
24 | extern int         mock_kthread_create_errors;
25 | extern int         mock_register_protosw_errors;
26 | extern char        mock_xmit_prios[];
27 | extern int         mock_log_rcu_sched;
28 | extern int         mock_max_grants;
29 | extern int         mock_max_skb_frags;
30 | extern int         mock_mtu;
31 | extern struct net_device
32 | 		   mock_net_device;
33 | extern __u64       mock_ns;
34 | extern __u64       mock_ns_tick;
35 | extern int         mock_numa_mask;
36 | extern int         mock_page_nid_mask;
37 | extern char        mock_printk_output[];
38 | extern int         mock_route_errors;
39 | extern int         mock_spin_lock_held;
40 | extern struct task_struct
41 | 		   mock_task;
42 | extern int         mock_trylock_errors;
43 | extern int         mock_vmalloc_errors;
44 | extern int         mock_xmit_log_verbose;
45 | extern int         mock_xmit_log_homa_info;
46 | 
47 | struct page *
48 | 		   mock_alloc_pages(gfp_t gfp, unsigned order);
49 | int         mock_check_error(int *errorMask);
50 | void        mock_clear_xmit_prios(void);
51 | void        mock_data_ready(struct sock *sk);
52 | cycles_t    mock_get_cycles(void);
53 | unsigned int
54 | 	    mock_get_mtu(const struct dst_entry *dst);
55 | void        mock_get_page(struct page *page);
56 | int         mock_page_refs(struct page *page);
57 | int         mock_page_refs(struct page *page);
58 | void        mock_put_page(struct page *page);
59 | void        mock_rcu_read_lock(void);
60 | void        mock_rcu_read_unlock(void);
61 | struct ctl_table_header *
62 | 	    mock_register_net_sysctl(struct net *net,
63 | 				     const char *path,
64 | 				     struct ctl_table *table);
65 | void        mock_set_core(int num);
66 | void        mock_set_ipv6(struct homa_sock *hsk);
67 | void        mock_spin_lock(spinlock_t *lock);
68 | void        mock_spin_unlock(spinlock_t *lock);
69 | int         mock_skb_count(void);
70 | struct sk_buff *
71 |             mock_skb_new(struct in6_addr *saddr, struct homa_common_hdr *h,
72 | 			int extra_bytes, int first_value);
73 | void        mock_sock_destroy(struct homa_sock *hsk,
74 | 			      struct homa_socktab *socktab);
75 | void        mock_sock_init(struct homa_sock *hsk, struct homa *homa,
76 | 			   int port);
77 | void        mock_teardown(void);
78 | void       *mock_vmalloc(size_t size);
79 | 


--------------------------------------------------------------------------------
/test/unit_homa_metrics.c:
--------------------------------------------------------------------------------
 1 | // SPDX-License-Identifier: BSD-2-Clause
 2 | 
 3 | #include "homa_impl.h"
 4 | #define KSELFTEST_NOT_MAIN 1
 5 | #include "kselftest_harness.h"
 6 | #include "ccutils.h"
 7 | #include "mock.h"
 8 | #include "utils.h"
 9 | 
10 | FIXTURE(homa_metrics) {
11 | 	struct homa homa;
12 | };
13 | FIXTURE_SETUP(homa_metrics)
14 | {
15 | 	homa_init(&self->homa);
16 | 	global_homa = &self->homa;
17 | }
18 | FIXTURE_TEARDOWN(homa_metrics)
19 | {
20 | 	global_homa = NULL;
21 | 	homa_destroy(&self->homa);
22 | 	unit_teardown();
23 | }
24 | 
25 | TEST_F(homa_metrics, homa_metric_append)
26 | {
27 | 	self->homa.metrics_length = 0;
28 | 	homa_metric_append(&self->homa, "x: %d, y: %d", 10, 20);
29 | 	EXPECT_EQ(12, self->homa.metrics_length);
30 | 	EXPECT_STREQ("x: 10, y: 20", self->homa.metrics);
31 | 
32 | 	homa_metric_append(&self->homa, ", z: %d", 12345);
33 | 	EXPECT_EQ(22, self->homa.metrics_length);
34 | 	EXPECT_STREQ("x: 10, y: 20, z: 12345", self->homa.metrics);
35 | 	EXPECT_EQ(30, self->homa.metrics_capacity);
36 | 
37 | 	homa_metric_append(&self->homa, ", q: %050d", 88);
38 | 	EXPECT_EQ(77, self->homa.metrics_length);
39 | 	EXPECT_STREQ("x: 10, y: 20, z: 12345, q: 00000000000000000000000000000000000000000000000088",
40 | 			self->homa.metrics);
41 | 	EXPECT_EQ(120, self->homa.metrics_capacity);
42 | }
43 | TEST_F(homa_metrics, homa_metrics_open)
44 | {
45 | 	EXPECT_EQ(0, homa_metrics_open(NULL, NULL));
46 | 	EXPECT_NE(NULL, self->homa.metrics);
47 | 
48 | 	strcpy(self->homa.metrics, "12345");
49 | 	EXPECT_EQ(0, homa_metrics_open(NULL, NULL));
50 | 	EXPECT_EQ(5, strlen(self->homa.metrics));
51 | 	EXPECT_EQ(2, self->homa.metrics_active_opens);
52 | }
53 | TEST_F(homa_metrics, homa_metrics_read__basics)
54 | {
55 | 	loff_t offset = 10;
56 | 	char buffer[1000];
57 | 
58 | 	self->homa.metrics = kmalloc(100, GFP_KERNEL);
59 | 	self->homa.metrics_capacity = 100;
60 | 	strcpy(self->homa.metrics, "0123456789abcdefghijklmnop");
61 | 	self->homa.metrics_length = 26;
62 | 	EXPECT_EQ(5, homa_metrics_read(NULL, buffer, 5, &offset));
63 | 	EXPECT_SUBSTR("_copy_to_user copied 5 bytes", unit_log_get());
64 | 	EXPECT_EQ(15, offset);
65 | 
66 | 	unit_log_clear();
67 | 	EXPECT_EQ(11, homa_metrics_read(NULL, buffer, 1000, &offset));
68 | 	EXPECT_SUBSTR("_copy_to_user copied 11 bytes", unit_log_get());
69 | 	EXPECT_EQ(26, offset);
70 | 
71 | 	unit_log_clear();
72 | 	EXPECT_EQ(0, homa_metrics_read(NULL, buffer, 1000, &offset));
73 | 	EXPECT_STREQ("", unit_log_get());
74 | 	EXPECT_EQ(26, offset);
75 | }
76 | TEST_F(homa_metrics, homa_metrics_read__error_copying_to_user)
77 | {
78 | 	loff_t offset = 10;
79 | 	char buffer[1000];
80 | 
81 | 	self->homa.metrics = kmalloc(100, GFP_KERNEL);
82 | 	self->homa.metrics_capacity = 100;
83 | 	strcpy(self->homa.metrics, "0123456789abcdefghijklmnop");
84 | 	self->homa.metrics_length = 26;
85 | 	mock_copy_to_user_errors = 1;
86 | 	EXPECT_EQ(EFAULT, -homa_metrics_read(NULL, buffer, 5, &offset));
87 | }
88 | 
89 | TEST_F(homa_metrics, homa_metrics_release)
90 | {
91 | 	self->homa.metrics_active_opens = 2;
92 | 	EXPECT_EQ(0, homa_metrics_release(NULL, NULL));
93 | 	EXPECT_EQ(1, self->homa.metrics_active_opens);
94 | 
95 | 	EXPECT_EQ(0, homa_metrics_release(NULL, NULL));
96 | 	EXPECT_EQ(0, self->homa.metrics_active_opens);
97 | }
98 | 


--------------------------------------------------------------------------------
/test/unit_homa_utils.c:
--------------------------------------------------------------------------------
  1 | // SPDX-License-Identifier: BSD-2-Clause
  2 | 
  3 | #include "homa_impl.h"
  4 | #define KSELFTEST_NOT_MAIN 1
  5 | #include "kselftest_harness.h"
  6 | #include "ccutils.h"
  7 | #include "mock.h"
  8 | #include "utils.h"
  9 | 
 10 | #define n(x) htons(x)
 11 | #define N(x) htonl(x)
 12 | 
 13 | FIXTURE(homa_utils) {
 14 | 	struct homa homa;
 15 | };
 16 | FIXTURE_SETUP(homa_utils)
 17 | {
 18 | 	homa_init(&self->homa);
 19 | 	unit_log_clear();
 20 | }
 21 | FIXTURE_TEARDOWN(homa_utils)
 22 | {
 23 | 	homa_destroy(&self->homa);
 24 | 	unit_teardown();
 25 | }
 26 | 
 27 | /**
 28 |  * set_cutoffs() - A convenience method to allow all of the values in
 29 |  * homa->unsched_cutoffs to be set concisely.
 30 |  * @homa:   Contains the unsched_cutoffs to be modified.
 31 |  * @c0:     New value for homa->unsched_cutoffs[0]
 32 |  * @c1:     New value for homa->unsched_cutoffs[1]
 33 |  * @c2:     New value for homa->unsched_cutoffs[2]
 34 |  * @c3:     New value for homa->unsched_cutoffs[3]
 35 |  * @c4:     New value for homa->unsched_cutoffs[4]
 36 |  * @c5:     New value for homa->unsched_cutoffs[5]
 37 |  * @c6:     New value for homa->unsched_cutoffs[6]
 38 |  * @c7:     New value for homa->unsched_cutoffs[7]
 39 |  */
 40 | static void set_cutoffs(struct homa *homa, int c0, int c1, int c2,
 41 | 		int c3, int c4, int c5, int c6, int c7)
 42 | {
 43 | 	homa->unsched_cutoffs[0] = c0;
 44 | 	homa->unsched_cutoffs[1] = c1;
 45 | 	homa->unsched_cutoffs[2] = c2;
 46 | 	homa->unsched_cutoffs[3] = c3;
 47 | 	homa->unsched_cutoffs[4] = c4;
 48 | 	homa->unsched_cutoffs[5] = c5;
 49 | 	homa->unsched_cutoffs[6] = c6;
 50 | 	homa->unsched_cutoffs[7] = c7;
 51 | }
 52 | 
 53 | TEST_F(homa_utils, homa_init__kmalloc_failure_for_port_map)
 54 | {
 55 | 	struct homa homa2;
 56 | 
 57 | 	memset(&homa2, 0, sizeof(homa2));
 58 | 	mock_kmalloc_errors = 1;
 59 | 	EXPECT_EQ(ENOMEM, -homa_init(&homa2));
 60 | 	EXPECT_EQ(NULL, homa2.port_map);
 61 | 	homa_destroy(&homa2);
 62 | }
 63 | TEST_F(homa_utils, homa_init__kmalloc_failure_for_peers)
 64 | {
 65 | 	struct homa homa2;
 66 | 
 67 | 	memset(&homa2, 0, sizeof(homa2));
 68 | 	mock_kmalloc_errors = 2;
 69 | 	EXPECT_EQ(ENOMEM, -homa_init(&homa2));
 70 | 	EXPECT_NE(NULL, homa2.port_map);
 71 | 	EXPECT_EQ(NULL, homa2.peers);
 72 | 	homa_destroy(&homa2);
 73 | }
 74 | TEST_F(homa_utils, homa_init__homa_skb_init_failure)
 75 | {
 76 | 	struct homa homa2;
 77 | 
 78 | 	memset(&homa2, 0, sizeof(homa2));
 79 | 	mock_kmalloc_errors = 4;
 80 | 	EXPECT_EQ(ENOMEM, -homa_init(&homa2));
 81 | 	EXPECT_SUBSTR("Couldn't initialize skb management (errno 12)",
 82 | 		      mock_printk_output);
 83 | 	homa_destroy(&homa2);
 84 | }
 85 | TEST_F(homa_utils, homa_init__cant_create_pacer_thread)
 86 | {
 87 | 	struct homa homa2;
 88 | 
 89 | 	memset(&homa2, 0, sizeof(homa2));
 90 | 	mock_kthread_create_errors = 1;
 91 | 	EXPECT_EQ(EACCES, -homa_init(&homa2));
 92 | 	EXPECT_EQ(NULL, homa2.pacer_kthread);
 93 | 	homa_destroy(&homa2);
 94 | }
 95 | 
 96 | TEST_F(homa_utils, homa_print_ipv4_addr)
 97 | {
 98 | 	struct in6_addr test_addr1 = unit_get_in_addr("192.168.0.1");
 99 | 	struct in6_addr test_addr2 = unit_get_in_addr("1.2.3.4");
100 | 	struct in6_addr test_addr3 = unit_get_in_addr("5.6.7.8");
101 | 	char *p1, *p2;
102 | 	int i;
103 | 
104 | 	p1 = homa_print_ipv6_addr(&test_addr1);
105 | 	p2 = homa_print_ipv6_addr(&test_addr2);
106 | 	EXPECT_STREQ("192.168.0.1", p1);
107 | 	EXPECT_STREQ("1.2.3.4", p2);
108 | 
109 | 	/* Make sure buffers eventually did reused. */
110 | 	for (i = 0; i < 20; i++)
111 | 		homa_print_ipv6_addr(&test_addr3);
112 | 	EXPECT_STREQ("5.6.7.8", p1);
113 | }
114 | 
115 | TEST_F(homa_utils, homa_snprintf)
116 | {
117 | 	char buffer[50];
118 | 	int used = 0;
119 | 
120 | 	used = homa_snprintf(buffer, sizeof32(buffer), used,
121 | 			"Test message with values: %d and %d", 100, 1000);
122 | 	EXPECT_EQ(38, used);
123 | 	EXPECT_STREQ("Test message with values: 100 and 1000", buffer);
124 | 
125 | 	used = homa_snprintf(buffer, sizeof32(buffer), used,
126 | 			"; plus: %d", 123456);
127 | 	EXPECT_EQ(49, used);
128 | 	EXPECT_STREQ("Test message with values: 100 and 1000; plus: 123",
129 | 			buffer);
130 | 
131 | 	used = homa_snprintf(buffer, sizeof32(buffer), used,
132 | 			"more text, none of which fits");
133 | 	EXPECT_EQ(49, used);
134 | 	EXPECT_STREQ("Test message with values: 100 and 1000; plus: 123",
135 | 			buffer);
136 | }
137 | 
138 | TEST_F(homa_utils, homa_prios_changed__basics)
139 | {
140 | 	set_cutoffs(&self->homa, 90, 80, HOMA_MAX_MESSAGE_LENGTH*2, 60, 50,
141 | 			40, 30, 0);
142 | 	self->homa.num_priorities = 6;
143 | 	homa_prios_changed(&self->homa);
144 | 	EXPECT_EQ(0, self->homa.unsched_cutoffs[6]);
145 | 	EXPECT_EQ(40, self->homa.unsched_cutoffs[5]);
146 | 	EXPECT_EQ(60, self->homa.unsched_cutoffs[3]);
147 | 	EXPECT_EQ(HOMA_MAX_MESSAGE_LENGTH*2, self->homa.unsched_cutoffs[2]);
148 | 	EXPECT_EQ(80, self->homa.unsched_cutoffs[1]);
149 | 	EXPECT_EQ(INT_MAX, self->homa.unsched_cutoffs[0]);
150 | 	EXPECT_EQ(1, self->homa.max_sched_prio);
151 | 	EXPECT_EQ(1, self->homa.cutoff_version);
152 | }
153 | TEST_F(homa_utils, homa_prios_changed__num_priorities_too_large)
154 | {
155 | 	self->homa.num_priorities = 100;
156 | 	homa_prios_changed(&self->homa);
157 | 	EXPECT_EQ(8, self->homa.num_priorities);
158 | }
159 | TEST_F(homa_utils, homa_prios_changed__share_lowest_priority)
160 | {
161 | 	set_cutoffs(&self->homa, 90, 80, 70, 60, 50, 40, 30, 0);
162 | 	self->homa.num_priorities = 7;
163 | 	homa_prios_changed(&self->homa);
164 | 	EXPECT_EQ(30, self->homa.unsched_cutoffs[6]);
165 | 	EXPECT_EQ(80, self->homa.unsched_cutoffs[1]);
166 | 	EXPECT_EQ(0x7fffffff, self->homa.unsched_cutoffs[0]);
167 | 	EXPECT_EQ(0, self->homa.max_sched_prio);
168 | }
169 | 


--------------------------------------------------------------------------------
/test/utils.h:
--------------------------------------------------------------------------------
 1 | /* SPDX-License-Identifier: BSD-2-Clause */
 2 | 
 3 | /* Utility functions for unit tests, implemented in C. */
 4 | 
 5 | struct homa_message_out;
 6 | struct homa_rpc;
 7 | struct unit_hash;
 8 | 
 9 | /**
10 |  * define UNIT_TEST_DATA_PER_PACKET - bytes of payload to use as the
11 |  * default for packets sent in unit tests.
12 |  */
13 | #define UNIT_TEST_DATA_PER_PACKET 1400
14 | 
15 | /**
16 |  * enum unit_rpc_state - used as the @state argument to unit_client_rpc
17 |  * and unit_server_rpc.
18 |  * UNIT_OUTGOING -      RPC state is RPC_OUTGOING, no packets have been sent.
19 |  * UNIT_RCVD_ONE_PKT -  RPC state is RPC_INCOMING, a single packet has
20 |  *                      been received.
21 |  * UNIT_RCVD_MSG -      RPC state is RPC_INCOMING, the entire message has
22 |  *                      been received.
23 |  * UNIT_IN_SERVICE -    RPC state is RPC_IN_SERVICE (only valid for
24 |  *                      unit_server_rpc).
25 |  */
26 | enum unit_rpc_state {
27 | 	UNIT_OUTGOING       = 21,
28 | 	UNIT_RCVD_ONE_PKT   = 22,
29 | 	UNIT_RCVD_MSG       = 23,
30 | 	UNIT_IN_SERVICE     = 24,
31 | };
32 | 
33 | extern char         *unit_ack_string(struct homa_ack *ack);
34 | extern struct homa_rpc
35 | 		*unit_client_rpc(struct homa_sock *hsk,
36 | 			enum unit_rpc_state state, struct in6_addr *client_ip,
37 | 			struct in6_addr *server_ip, int server_port, int id,
38 | 			int req_length, int resp_length);
39 | extern struct in6_addr
40 | 		     unit_get_in_addr(char *s);
41 | extern void          unit_homa_destroy(struct homa *homa);
42 | extern struct iov_iter
43 | 		    *unit_iov_iter(void *buffer, size_t length);
44 | extern int           unit_list_length(struct list_head *head);
45 | extern void          unit_log_active_ids(struct homa_sock *hsk);
46 | extern void          unit_log_filled_skbs(struct sk_buff *skb, int verbose);
47 | extern void          unit_log_frag_list(struct sk_buff *skb, int verbose);
48 | extern void          unit_log_grantables(struct homa *homa);
49 | extern void          unit_log_hashed_rpcs(struct homa_sock *hsk);
50 | extern void          unit_log_message_out_packets(
51 | 			struct homa_message_out *message, int verbose);
52 | extern const char   *unit_print_gaps(struct homa_rpc *rpc);
53 | extern struct homa_rpc
54 | 		    *unit_server_rpc(struct homa_sock *hsk,
55 | 			enum unit_rpc_state state, struct in6_addr *server_ip,
56 | 			struct in6_addr *client_ip, int client_port, int id,
57 | 			int req_length, int resp_length);
58 | extern void          unit_log_skb_list(struct sk_buff_head *packets,
59 | 			int verbose);
60 | extern void          unit_log_throttled(struct homa *homa);
61 | extern void          unit_teardown(void);
62 | 
63 | /* Kludge to avoid including arpa/inet.h, which causes definition
64 |  * conflicts with kernel header files.
65 |  */
66 | extern int inet_pton(int af, const char *src, void *dst);
67 | 


--------------------------------------------------------------------------------
/util/Makefile:
--------------------------------------------------------------------------------
 1 | # Makefile to build various testing programs for Homa.
 2 | 
 3 | CFLAGS := -Wall -Werror -fno-strict-aliasing -O3 -I..
 4 | 
 5 | BINS := buffer_client buffer_server cp_node dist_test dist_to_proto \
 6 | 	get_time_trace homa_prio homa_test inc_tput receive_raw scratch \
 7 | 	send_raw server smi test_time_trace use_memory
 8 | 
 9 | OBJS := $(patsubst %,%.o,$(BINS))
10 | 
11 | LIB_SRCS := dist.cc homa_api.c test_utils.cc time_trace.cc
12 | LIB_OBJS := $(patsubst %.c,%.o,$(patsubst %.cc,%.o,$(LIB_SRCS)))
13 | LIB_OBJS += homa_receiver.o
14 | 
15 | HDRS = ../homa_receiver.h ../homa.h dist.h time_trace.h
16 | 
17 | .SECONDARY: $(OBJS) $(LIB_OBJS)
18 | 
19 | all: $(BINS)
20 | 
21 | cp_node: cp_node.o dist.o time_trace.o $(LIB_OBJS)
22 | 	g++ $(CFLAGS) $^ -lpthread -o $@
23 | 
24 | $(OBJS) $(LIB_OBJS): $(HDRS)
25 | 
26 | homa_receiver.o: ../homa_receiver.cc ../homa_receiver.h
27 | 	g++ -c $(CFLAGS) -std=c++17 $< -o $@
28 | 
29 | # This seems to be the only way to disable the built-in implicit rules
30 | # for %:%.c and %:%.cc.
31 | .SUFFIXES:
32 | 
33 | %: %.o $(LIB_OBJS)
34 | 	g++ $(CFLAGS) $^ -lpthread -o $@
35 | 
36 | %.o: %.cc test_utils.h ../homa.h
37 | 	g++ -c $(CFLAGS) -std=c++17 $< -o $@
38 | 
39 | %.o: %.c test_utils.h ../homa.h
40 | 	cc -c $(CFLAGS) $< -o $@
41 | 
42 | homa_api.o: ../homa_api.c ../homa.h
43 | 	cc -c $(CFLAGS) $< -o $@
44 | 
45 | clean:
46 | 	rm -f $(BINS) $(OBJS) $(LIB_OBJS)
47 | 
48 | # The following target is useful for debugging Makefiles; it
49 | # prints the value of a make variable.
50 | print-%:
51 | 	@echo $* = $($*)
52 | 


--------------------------------------------------------------------------------
/util/README.md:
--------------------------------------------------------------------------------
 1 | This directory contains a collection of utilities for testing and
 2 | analyzing Homa. Here is a summary of some of the programs in this
 3 | directory; for more information, run any program with the "--help"
 4 | option, or look at its source code.
 5 | 
 6 | ### Cluster Performance Tests
 7 | 
 8 | **cp_node**: a program that runs on an individual node as part of cluster
 9 | benchmarks. You can run this program by hand (e.g. on one client machine
10 | and one server machine): type `cp_node --help` for basic documentation.
11 | This program is also run automatically by the other cp_* benchmarks.
12 | 
13 | **cp_vs_tcp**: the primary cluster performance test. Measures slowdown
14 | as a function of message size for Homa and TCP under various workloads.
15 | 
16 | **cp_basic**: measures basic latency and throughput for Homa and TCP.
17 | 
18 | **cp_client_threads**: measures the throughput of a single client as a
19 | function of the number of sending threads.
20 | 
21 | **cp_config**: measures Homa slowdown while varying one or more
22 | configuration parameters.
23 | 
24 | **cp_load**: generates CDFs of short message latency for Homa and
25 | TCP under different network loads.
26 | 
27 | **cp_mtu**: generates CDFs of short message latency for Homa and TCP
28 | while varying the maximum packet length.
29 | 
30 | **cp_server_ports**: measures single-server throughput as a function
31 | of the number of receiving ports.
32 | 
33 | **cp_tcp**: measures the performance of TCP by itself, with no message
34 | truncation.
35 | 
36 | ### Timetracing Tools
37 | A number of programs are available for collecting, transforming, and analyzing
38 | timetraces. Most have --help options that provide documentation. The following
39 | scripts are relatively general-purpose (i.e. they don't have Homa dependencies):
40 | 
41 | **ttgrep.py**: extracts records from a timetrace that match a pattern and
42 | recomputes the time differences using only those records.
43 | 
44 | **ttmerge.py**: combines two or more timetraces into a single timetrace.
45 | 
46 | **ttoffset.py**: offsets all of the times in a timetrace by a given amount (usually
47 | done to line up times in one trace with times in another).
48 | 
49 | **ttrange.py**: extracts timetrace entries from a given time range.
50 | 
51 | **ttsum.py**: outputs statistics from a timetrace on the delay preceding each
52 | event. Can also produce a timeline for repeated operations such as processing
53 | a request on a server.
54 | 
55 | The following scripts are Homa-specific:
56 | 
57 | **ttprint.py**: extracts the most recent timetrace from the kernel and
58 | prints it to standard output.
59 | 
60 | **ttsync.py**: analyzes Homa-specific information in a collection of
61 | timetraces simultaneously on different nodes and rewrites the traces to
62 | synchronize their clocks.
63 | 
64 | **tthoma.py**: this is the primary script for analyzing Homa data. It
65 | contains multiple analyzers that extract different kinds of data from a
66 | collection of timetraces. Invoke with --help for full documentation.
67 | 
68 | ### Other Useful Tools
69 | 
70 | **diff_rtts.py**: compares two .rtts files collected by the cperf benchmarks,
71 | tries to identify how/why they are different.


--------------------------------------------------------------------------------
/util/avg.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """
 4 | Reads lines and extracts the first floating-point number to appear on
 5 | each line; prints both the individual values and the average of them.
 6 | Usage: avg.py [file]
 7 | """
 8 | 
 9 | from __future__ import division, print_function
10 | from glob import glob
11 | from optparse import OptionParser
12 | import math
13 | import os
14 | import re
15 | import string
16 | import sys
17 | 
18 | if len(sys.argv) == 2:
19 |     f = open(sys.argv[1])
20 | elif len(sys.argv) == 1:
21 |     f = sys.stdin
22 | else:
23 |     print("Usage: %s [tt_file]" % (sys.argv[0]))
24 |     sys.exit(1)
25 | 
26 | values = []
27 | 
28 | for line in f:
29 |     match = re.match('.*?[^0-9]([0-9]+[.][0-9]+)', line)
30 |     if match:
31 |         print('Found field %s' % (match.group(1)))
32 |         values.append(float(match.group(1)))
33 |     else:
34 |         print('Line didn\'t match: %s' % (line))
35 | 
36 | if len(values):
37 |     print('Average: %.3f' % (sum(values)/len(values)))
38 | else:
39 |     print('No lines matched')


--------------------------------------------------------------------------------
/util/buffer_client.c:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2019-2022 Homa Developers
 2 |  * SPDX-License-Identifier: BSD-1-Clause
 3 |  */
 4 | 
 5 | /* This is a test program used together with buffer_server.c to learn about
 6 |  * how TCP handles buffer exhaustion. This program opens an infinite series
 7 |  * of sockets to a single port and writes as much data to each socket as
 8 |  * if can before the socket backs up (it assumes that the server application
 9 |  * is not reading any of the data). Once each socket backs up, it goes on
10 |  * to the next socket.
11 |  *
12 |  * Usage:
13 |  * buffer_client hostName port
14 |  */
15 | 
16 | #include <errno.h>
17 | #include <netdb.h>
18 | #include <stdio.h>
19 | #include <string.h>
20 | #include <stdlib.h>
21 | #include <unistd.h>
22 | #include <sys/types.h>
23 | #include <sys/socket.h>
24 | 
25 | int main(int argc, char** argv) {
26 | 	int fd, status, port;
27 | 	struct addrinfo *result;
28 | 	struct addrinfo hints;
29 | 	char *host;
30 | #define BUFFER_SIZE 4096
31 | 	char buffer[BUFFER_SIZE];
32 | 	int bytesSent;
33 | 
34 | 	if (argc < 3) {
35 | 		printf("Usage: %s hostName port\n", argv[0]);
36 | 		exit(1);
37 | 	}
38 | 	host = argv[1];
39 | 	port = strtol(argv[2], NULL, 10);
40 | 	if (port == 0) {
41 | 		printf("Bad port number %s; must be integer\n",
42 | 				argv[2]);
43 | 		exit(1);
44 | 	}
45 | 
46 | 	memset(&hints, 0, sizeof(struct addrinfo));
47 | 	hints.ai_family = AF_INET;
48 | 	hints.ai_socktype = SOCK_STREAM;
49 | 	status = getaddrinfo(host, argv[2], &hints, &result);
50 | 	if (status != 0) {
51 | 		printf("Couldn't look up address for %s: %s\n",
52 | 				host, gai_strerror(status));
53 | 		exit(1);
54 | 	}
55 | 
56 | 	while (1) {
57 | 		fd = socket(PF_INET, SOCK_STREAM, 0);
58 | 		if (fd < 0) {
59 | 			printf("Couldn't create socket: %s\n", strerror(errno));
60 | 			exit(1);
61 | 		}
62 | 		status = connect(fd, result->ai_addr, result->ai_addrlen);
63 | 		if (status < 0) {
64 | 		    close(fd);
65 | 		    fd = -1;
66 | 		    printf("Couldn't connect to %s:%d: %s\n", host, port,
67 | 				strerror(errno));
68 | 		    sleep(5);
69 | 		    continue;
70 | 		}
71 | 
72 | 		bytesSent = 0;
73 | 		while (1) {
74 | 			status = send(fd, buffer, BUFFER_SIZE,
75 | 				MSG_NOSIGNAL|MSG_DONTWAIT);
76 | 			if (status > 0) {
77 | 				bytesSent += status;
78 | 				continue;
79 | 			}
80 | 			if (status == 0) {
81 | 				printf("Fd %d got 0 status after sending %d bytes\n",
82 | 						fd, bytesSent);
83 | 			} else if (errno == EAGAIN) {
84 | 				printf("Fd %d blocked after sending %d bytes\n",
85 | 						fd, bytesSent);
86 | 			} else {
87 | 				printf("Fd %d failed after sending %d "
88 | 						"bytes: %s (%d)\n",
89 | 						fd, bytesSent, strerror(errno),
90 | 						errno);
91 | 			}
92 | 			break;
93 | 		}
94 | 	}
95 | 	exit(0);
96 | }
97 | 
98 | 


--------------------------------------------------------------------------------
/util/buffer_server.c:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2019-2022 Homa Developers
 2 |  * SPDX-License-Identifier: BSD-1-Clause
 3 |  */
 4 | 
 5 | /* This is a test program used together with buffer_client.c to learn about
 6 |  * how TCP handles buffer exhaustion. This program opens accepts connections
 7 |  * on a given port, but it never reads any incoming data, so buffers will
 8 |  * pile up in the kernel.
 9 |  *
10 |  * Usage:
11 |  * buffer_server port
12 |  */
13 | 
14 | #include <errno.h>
15 | #include <netdb.h>
16 | #include <stdio.h>
17 | #include <string.h>
18 | #include <stdlib.h>
19 | #include <unistd.h>
20 | #include <sys/types.h>
21 | #include <sys/socket.h>
22 | 
23 | #include "homa.h"
24 | #include "test_utils.h"
25 | 
26 | int main(int argc, char** argv) {
27 | 	int fd, port;
28 | 	int optval = 1;
29 | 	union sockaddr_in_union bindAddress;
30 | 
31 | 	if (argc < 2) {
32 | 		printf("Usage: %s port\n", argv[0]);
33 | 		exit(1);
34 | 	}
35 | 	port = strtol(argv[1], NULL, 10);
36 | 	if (port == 0) {
37 | 		printf("Bad port number %s; must be integer\n",
38 | 				argv[1]);
39 | 		exit(1);
40 | 	}
41 | 
42 | 	fd = socket(PF_INET, SOCK_STREAM, 0);
43 | 	if (fd < 0) {
44 | 		printf("Couldn't create socket: %s\n", strerror(errno));
45 | 		exit(1);
46 | 	}
47 | 	if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &optval,
48 |                            sizeof(optval)) != 0) {
49 | 		printf("Couldn't set SO_REUSEADDR: %s\n", strerror(errno));
50 | 		exit(1);
51 | 	}
52 | 	bindAddress.in4.sin_family = AF_INET;
53 | 	bindAddress.in4.sin_port = htons(port);
54 | 	bindAddress.in4.sin_addr.s_addr = htonl(INADDR_ANY);
55 | 	if (bind(fd, &bindAddress.sa, sizeof(bindAddress.in4))
56 | 	    != 0) {
57 | 		printf("Couldn't bind to port %d\n: %s\n", port, strerror(errno));
58 | 		exit(1);
59 | 	}
60 | 	if (listen(fd, 1000000) != 0) {
61 | 		printf("Listen failed on socket: %s\n", strerror(errno));
62 | 		exit(1);
63 | 	}
64 | 
65 | 	while (1) {
66 | 		int peerFd;
67 | 		peerFd = accept(fd, NULL, NULL);
68 | 		if (peerFd < 0) {
69 | 			printf("Accept failed: %s\n", strerror(errno));
70 | 		} else {
71 | 			printf("Accepted connection on fd %d\n", peerFd);
72 | 		}
73 | 	}
74 | 	exit(0);
75 | }
76 | 
77 | 


--------------------------------------------------------------------------------
/util/cp_both:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | # Copyright (c) 2024 Homa Developers
 4 | # SPDX-License-Identifier: BSD-1-Clause
 5 | 
 6 | # This cperf benchmark runs both TCP and Homa on each client and server
 7 | # node in order to measure interference between the protocols.
 8 | # Type "cp_both --help" for documentation.
 9 | 
10 | from cperf import *
11 | 
12 | for option in ['client_max', 'client_ports', 'port_threads', 'server_ports',
13 |         'tcp_client_ports', 'tcp_server_ports']:
14 |     default_defaults[option] = (default_defaults[option]+1)/2
15 | parser = get_parser(description=
16 |         'Measures slowdown when TCP and Homa are competing for resources '
17 |         'on the same nodes.',
18 |         usage='%(prog)s [options]', defaults={'homa_gbps': 0})
19 | parser.add_argument('--homa-gbps', type=float, dest='homa_gbps',
20 |         metavar='B', default=None,
21 |         help='Configure Homa to generate B Gbps of total outgoing bandwidth '
22 |         'on each node (clients and servers combined); the remainder of '
23 |         '--gbps will be generated by TCP (default: split --gbps between '
24 |         'Homa and TCP)')
25 | default_defaults['client_max']
26 | options = parser.parse_args()
27 | init(options)
28 | 
29 | # First, run the experiment
30 | if not options.plot_only:
31 |     homa_options = copy.deepcopy(options)
32 |     homa_options.name = "homa_" + options.workload
33 |     homa_options.protocol = "homa"
34 | 
35 |     tcp_options = copy.deepcopy(options)
36 |     tcp_options.name = "tcp_" + options.workload
37 |     tcp_options.protocol = "tcp"
38 | 
39 |     if options.homa_gbps == None:
40 |         options.homa_gbps = options.gbps/2.0
41 |     tcp_options.gbps = (options.gbps - options.homa_gbps)/2
42 |     if tcp_options.gbps < 0:
43 |         tcp_options.gbps = 0
44 |     homa_options.gbps = options.gbps/2 - tcp_options.gbps
45 |     try:
46 |         run_experiments(homa_options, tcp_options)
47 |     except Exception as e:
48 |         log(traceback.format_exc())
49 |     log("Stopping nodes")
50 |     stop_nodes()
51 |     scan_logs()
52 | 
53 | # Generate plots and reports
54 | homa_exp = "homa_" + options.workload
55 | scan_metrics(homa_exp)
56 | tcp_exp = "tcp_" + options.workload
57 | scan_metrics(tcp_exp)
58 | 
59 | # Generate slowdown plot.
60 | log("Generating slowdown plot for %s" % (options.workload))
61 | title = "TCP (%.1f Gbps) and Homa (%.1f Gbps) together, %s %d nodes" % (
62 |         options.gbps - options.homa_gbps, options.homa_gbps,
63 |         options.workload.capitalize(), options.num_nodes)
64 | ax = start_slowdown_plot(title, 1000, homa_exp)
65 | plot_slowdown(ax, tcp_exp, "p99", "TCP P99", color=tcp_color)
66 | plot_slowdown(ax, tcp_exp, "p50", "TCP P50", color=tcp_color2)
67 | plot_slowdown(ax, homa_exp, "p99", "Homa P99", color=homa_color)
68 | plot_slowdown(ax, homa_exp, "p50", "Homa P50", color=homa_color2)
69 | ax.legend(loc="upper right", prop={'size': 9})
70 | plt.tight_layout()
71 | plt.savefig("%s/reports/both_%s.pdf" % (options.log_dir, options.workload))
72 | 
73 | # Generate CDF of small-message RTTs.
74 | log("Generating short message CDF for %s" % (options.workload))
75 | homa_x, homa_y = get_short_cdf(homa_exp)
76 | tcp_x, tcp_y = get_short_cdf(tcp_exp)
77 | start_cdf_plot(title, 10, 0.99e05, 1e-05, "RTT (usecs)",
78 |         "Cumulative Fraction Short Messages")
79 | plt.plot(tcp_x, tcp_y, label="TCP", color=tcp_color)
80 | plt.plot(homa_x, homa_y, label="Homa", color=homa_color)
81 | plt.legend(loc="upper right", prop={'size': 9})
82 | plt.savefig("%s/reports/short_cdf_%s.pdf" % (options.log_dir,
83 |         options.workload))
84 | 


--------------------------------------------------------------------------------
/util/cp_client_threads:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | # Copyright (c) 2020-2023 Homa Developers
  4 | # SPDX-License-Identifier: BSD-1-Clause
  5 | 
  6 | # This cperf benchmark measures the throughput of a single client as a
  7 | # function of the number of sending threads
  8 | 
  9 | from cperf import *
 10 | 
 11 | parser = get_parser(description=
 12 |         'Measures throughput of a single client as a function of the number '
 13 |         ' of sending threads.',
 14 |         usage='%(prog)s [options]',
 15 |         defaults={
 16 |             "server_ports":   6,
 17 |             "port_threads":   3})
 18 | options = parser.parse_args()
 19 | options.no_rtt_files = True
 20 | init(options)
 21 | if options.num_nodes < 2:
 22 |     print("--num_nodes too small (%d): must be at least 2"
 23 |             % (options.num_nodes))
 24 |     sys.exit(-1)
 25 | dir = "%s/reports" % (options.log_dir)
 26 | if not os.path.exists(dir):
 27 |     os.makedirs(dir)
 28 | 
 29 | options.server_nodes = options.num_nodes - 1
 30 | options.first_server = 1
 31 | options.port_receivers = 1
 32 | options.no_rtt_files = True
 33 | options.gbps = 0.0
 34 | workloads = ["w1", "w2", "w3", "w4", "w5"]
 35 | port_range = range(1,11)
 36 | 
 37 | # Run the experiments, if desired
 38 | if not options.plot_only:
 39 |     start_servers(range(1, options.num_nodes), options)
 40 |     for workload in workloads:
 41 |         for ports in port_range:
 42 |             exp = "%s_%s_%d" % (options.protocol, workload, ports)
 43 |             options.workload = workload
 44 |             options.client_ports = ports
 45 |             options.tcp_client_ports = ports
 46 |             run_experiment(exp, range(0,1), options)
 47 |     log("Stopping nodes")
 48 |     stop_nodes()
 49 | 
 50 | # Parse the log files to extract useful data
 51 | experiments = {}
 52 | scan_log(options.log_dir + "/node0.log", "node0", experiments)
 53 | 
 54 | # Keys are workload names, values are lists of throughputs for each
 55 | # number of ports
 56 | tput = {}
 57 | 
 58 | for workload in workloads:
 59 |     tput[workload] = []
 60 |     for ports in port_range:
 61 |         exp = "%s_%s_%d" % (options.protocol, workload, ports)
 62 |         node = experiments[exp]["node0"]
 63 |         readings = node["client_kops"]
 64 |         if len(readings) == 0:
 65 |             raise Error("No client RPC throughput found for experiment %s"
 66 |                     % (exp))
 67 |         tput[workload].append(sum(readings)/len(readings))
 68 | 
 69 | # print(tput)
 70 | fig, (ax1, ax2) = plt.subplots(2, figsize=[4, 5])
 71 | fig.suptitle("%s Single-Client Throughput" % (options.protocol.capitalize()),
 72 |         y=0.95)
 73 | plt.rcParams.update({'font.size': 10})
 74 | ax1.set_ylim(0, 2000)
 75 | ax2.set_ylim(0, 60)
 76 | for axis in [ax1, ax2]:
 77 |     axis.get_xaxis().set_tick_params(direction='in')
 78 |     axis.get_yaxis().set_tick_params(direction='in')
 79 |     axis.set_xlim(0, port_range[-1])
 80 |     top = axis.twiny()
 81 |     top.set_xlim(0, port_range[-1])
 82 |     top.set_xticklabels([])
 83 |     top.get_xaxis().set_tick_params(direction='in')
 84 | 
 85 |     axis.set_ylabel("Kops/second")
 86 |     right = axis.twinx()
 87 |     right.set_ylim(0, axis.get_ylim()[1])
 88 |     right.set_yticklabels([])
 89 |     right.get_yaxis().set_tick_params(direction='in')
 90 | ax1.grid(axis='y', which='major', linestyle='dotted')
 91 | ax2.set_xlabel("Sending threads")
 92 | ax2.grid(axis='y', which='major', linestyle='dotted')
 93 | colors = ['#9467bd', '#d62728', '#2ca02c', '#ff7f0e', '#1f77b4']
 94 | for workload in workloads:
 95 |     if (workload == "w4") or (workload == "w5"):
 96 |         ax2.plot(port_range, tput[workload], label=workload, color=colors.pop())
 97 |     else:
 98 |         ax1.plot(port_range, tput[workload], label=workload, color=colors.pop())
 99 | 
100 | ax1.legend(loc="upper left", prop={'size': 9})
101 | ax2.legend(loc="upper left", prop={'size': 9})
102 | # plt.tight_layout()
103 | plt.savefig("%s/reports/%s_client_tput.pdf" % (options.log_dir, options.protocol))


--------------------------------------------------------------------------------
/util/cp_mtu:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | # Copyright (c) 2020-2022 Homa Developers
 4 | # SPDX-License-Identifier: BSD-1-Clause
 5 | 
 6 | # This cperf benchmark generates CDFs of short-message latency for Homa
 7 | # and TCP under different values for MTU (maximum packet size).
 8 | # Type "cp_vs_mtu --help" for documentation.
 9 | 
10 | from cperf import *
11 | 
12 | parser = get_parser(description=
13 |         'Generates small-message latency CDFs as a function of MTU for '
14 |         'Homa and TCP.',
15 |         usage='%(prog)s [options]'})
16 | parser.add_argument('-w', '--workload', dest='workload',
17 |         metavar='W', required = True,
18 |         help='Workload to use for benchmark: w1-w5 or number')
19 | options = parser.parse_args()
20 | init(options)
21 | options.gbps = options.gbps/2.0
22 | servers = range(0, options.num_nodes)
23 | clients = range(0, options.num_nodes)
24 | mtus = [1500, 3000, 6000, 9000]
25 | 
26 | # Run the experiments
27 | if not options.plot_only:
28 |     try:
29 |         do_ssh(["config", "mtu", "1500"], range(0, options.num_nodes))
30 |         options.protocol = "homa"
31 |         start_servers(servers, options)
32 | 
33 |         o = copy.deepcopy(options)
34 |         o.gbps = 0.0
35 |         o.client_ports = 1
36 |         o.client_max = 1
37 |         o.server_ports = 1
38 |         o.server_nodes = 1
39 |         o.first_server = 1
40 |         o.unloaded = 500
41 |         run_experiment("unloaded_%s" % (options.workload), range(0, 1), o)
42 | 
43 |         for mtu in mtus:
44 |             do_ssh(["config", "mtu", str(mtu)], range(0, options.num_nodes))
45 |             start_servers(servers, options)
46 |             run_experiment("homa_%s_mtu%d" % (options.workload, mtu),
47 |                     clients, options)
48 | 
49 |         options.protocol = "tcp"
50 |         for mtu in mtus:
51 |             do_ssh(["config", "mtu", str(mtu)], range(0, options.num_nodes))
52 |             start_servers(servers, options)
53 |             run_experiment("tcp_%s_mtu%d" % (options.workload, mtu),
54 |                     clients, options)
55 | 
56 |         do_ssh(["config", "mtu", "1500"], range(0, options.num_nodes))
57 |     except Exception as e:
58 |         log(traceback.format_exc())
59 | 
60 |     log("Stopping nodes")
61 |     stop_nodes()
62 |     scan_logs()
63 | 
64 | # Generate plots and reports
65 | unloaded_exp = "unloaded_%s" % (options.workload)
66 | set_unloaded(unloaded_exp)
67 | 
68 | # Generate CDF of small-message RTTs.
69 | log("Generating short message CDFs")
70 | title = "%s %d nodes" % (options.workload.capitalize(), options.num_nodes)
71 | start_cdf_plot(title, 10, 0.99e05, 1e-05, "RTT (usecs)",
72 |         "Cumulative Fraction of Short Messages")
73 | 
74 | set_unloaded("unloaded_%s" % (options.workload))
75 | styles = [(0, (1, 1)), (0, (2, 2)), (0, (5, 2)), "solid"]
76 | for mtu in mtus:
77 |     x, y = get_short_cdf("tcp_%s_mtu%d" % (options.workload, mtu))
78 |     plt.plot(x, y, label="TCP MTU %d" % (mtu),
79 |             color=tcp_color, linestyle=styles.pop())
80 | styles = [(0, (1, 1)), (0, (2, 2)), (0, (5, 2)), "solid"]
81 | for mtu in mtus:
82 |     x, y = get_short_cdf("homa_%s_mtu%d" % (options.workload, mtu))
83 |     plt.plot(x, y, label="Homa MTU %d" % (mtu),
84 |             color=homa_color, linestyle=styles.pop())
85 | x, y = get_short_cdf(unloaded_exp)
86 | plt.plot(x, y, label="Homa best case", color=unloaded_color)
87 | 
88 | plt.legend(loc="upper right", prop={'size': 9})
89 | plt.savefig("%s/reports/mtu_cdfs_%s.pdf" % (options.log_dir, options.workload))
90 | 
91 | # print(plt.rcParams['axes.prop_cycle'].by_key()['color'])


--------------------------------------------------------------------------------
/util/cp_server_ports:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | # Copyright (c) 2020-2022 Homa Developers
  4 | # SPDX-License-Identifier: BSD-1-Clause
  5 | 
  6 | # This cperf benchmark measures the throughput of a single server as a
  7 | # function of the number of receiving ports
  8 | 
  9 | from cperf import *
 10 | 
 11 | parser = get_parser(description=
 12 |         'Measures throughput of a single server as a function of the number '
 13 |         ' of receiving ports.',
 14 |         usage='%(prog)s [options]',
 15 |         defaults={
 16 |             "server_ports":   6,
 17 |             "port_threads":   3})
 18 | options = parser.parse_args()
 19 | options.no_rtt_files = True
 20 | init(options)
 21 | if options.num_nodes < 2:
 22 |     print("--num_nodes too small (%d): must be at least 2"
 23 |             % (options.num_nodes))
 24 |     sys.exit(-1)
 25 | dir = "%s/reports" % (options.log_dir)
 26 | if not os.path.exists(dir):
 27 |     os.makedirs(dir)
 28 | 
 29 | options.server_nodes = 1
 30 | options.first_server = 0
 31 | options.client_ports = 5
 32 | options.tcp_client_ports = 5
 33 | options.port_receivers = 2
 34 | options.no_rtt_files = True
 35 | options.gbps = 0.0
 36 | # workloads = ["w1", "w2", "w3", "w4", "w5"]
 37 | workloads = ["w2", "w4"]
 38 | if options.protocol == "homa":
 39 |     port_range = range(1, (20//options.port_threads) + 1)
 40 | else:
 41 |     port_range = range(2, 21, 2)
 42 | 
 43 | print("port_range: %s" % (port_range))
 44 | 
 45 | # Run the experiments, if desired
 46 | if not options.plot_only:
 47 |     for ports in port_range:
 48 |         options.server_ports = ports
 49 |         options.tcp_server_ports = ports
 50 |         start_servers(range(0,1), options)
 51 |         for workload in workloads:
 52 |             exp = "%s_%s_%d" % (options.protocol, workload, ports)
 53 |             options.workload = workload
 54 |             run_experiment(exp, range(1, options.num_nodes), options)
 55 |     log("Stopping nodes")
 56 |     stop_nodes()
 57 | 
 58 | # Parse the log files to extract useful data
 59 | experiments = {}
 60 | scan_log(options.log_dir + "/node0.log", "node0", experiments)
 61 | 
 62 | # Keys are workload names, values are lists of throughputs for each
 63 | # number of ports
 64 | tput = {}
 65 | 
 66 | for workload in workloads:
 67 |     tput[workload] = []
 68 |     for ports in port_range:
 69 |         exp = "%s_%s_%d" % (options.protocol, workload, ports)
 70 |         print("Experiment %s: %s" % (exp, experiments[exp]))
 71 |         node = experiments[exp]["node0"]
 72 |         readings = node["server_kops"]
 73 |         if len(readings) == 0:
 74 |             raise Error("No RPC throughput found for experiment %s"
 75 |                     % (exp))
 76 |         tput[workload].append(sum(readings)/len(readings))
 77 | 
 78 | fig, (ax1, ax2) = plt.subplots(2, figsize=[4, 5])
 79 | fig.suptitle("%s Single-Server Throughput" % (options.protocol.capitalize()),
 80 |         y=0.95)
 81 | plt.rcParams.update({'font.size': 10})
 82 | ax1.set_ylim(0, 2000)
 83 | ax2.set_ylim(0, 60)
 84 | for axis in [ax1, ax2]:
 85 |     axis.get_xaxis().set_tick_params(direction='in')
 86 |     axis.get_yaxis().set_tick_params(direction='in')
 87 |     axis.set_xlim(0, port_range[-1])
 88 |     top = axis.twiny()
 89 |     top.set_xlim(0, port_range[-1])
 90 |     top.set_xticklabels([])
 91 |     top.get_xaxis().set_tick_params(direction='in')
 92 | 
 93 |     axis.set_ylabel("Kops/second")
 94 |     right = axis.twinx()
 95 |     right.set_ylim(0, axis.get_ylim()[1])
 96 |     right.set_yticklabels([])
 97 |     right.get_yaxis().set_tick_params(direction='in')
 98 | ax1.grid(axis='y', which='major', linestyle='dotted')
 99 | ax2.set_xlabel("Receiving ports")
100 | ax2.grid(axis='y', which='major', linestyle='dotted')
101 | colors = ['#9467bd', '#d62728', '#2ca02c', '#ff7f0e', '#1f77b4']
102 | for workload in workloads:
103 |     if (workload == "w4") or (workload == "w5"):
104 |         ax2.plot(port_range, tput[workload], label=workload, color=colors.pop())
105 |     else:
106 |         ax1.plot(port_range, tput[workload], label=workload, color=colors.pop())
107 | 
108 | ax1.legend(loc="upper left", prop={'size': 9})
109 | ax2.legend(loc="upper left", prop={'size': 9})
110 | # plt.tight_layout()
111 | plt.savefig("%s/reports/%s_server_tput.pdf" % (options.log_dir, options.protocol))


--------------------------------------------------------------------------------
/util/cp_tcp:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | # Copyright (c) 2020-2022 Homa Developers
 4 | # SPDX-License-Identifier: BSD-1-Clause
 5 | 
 6 | # This cperf benchmark measures the performance of TCP by itself, with
 7 | # no message truncation.
 8 | # Type "cp_tcp --help" for documentation.
 9 | 
10 | from cperf import *
11 | 
12 | parser = get_parser(description=
13 |         'Measures slowdown as a function of message size for TCP or DCTCP.',
14 |         usage='%(prog)s [options]')
15 | parser.add_argument('--dctcp', dest='dctcp', type=boolean,
16 |         default=False, help="Boolean value:: indicates whether measurements "
17 |                 "should be run on DCTCP (default: false)")
18 | options = parser.parse_args()
19 | # options.no_trunc = True
20 | init(options)
21 | servers = range(0, options.num_nodes)
22 | clients = range(0, options.num_nodes)
23 | 
24 | if options.workload != "":
25 |     load_info = [[options.workload, options.gbps]];
26 | 
27 | if options.dctcp:
28 |     prot = "dctcp"
29 |     label = "DCTCP"
30 |     color = dctcp_color
31 | else:
32 |     prot = "tcp"
33 |     label = "DCTCP"
34 |     color = tcp_color
35 | 
36 | # First, run all of the experiments
37 | if not options.plot_only:
38 |     congestion = get_sysctl_parameter("net.ipv4.tcp_congestion_control")
39 |     if options.dctcp:
40 |         set_sysctl_parameter("net.ipv4.tcp_congestion_control",
41 |                         "dctcp", range(0, options.num_nodes))
42 |     else:
43 |         set_sysctl_parameter("net.ipv4.tcp_congestion_control",
44 |                     "cubic", range(0, options.num_nodes))
45 |     for workload, bw in load_info:
46 |         options.workload = workload
47 |         options.gbps = bw/2.0
48 |         unloaded_exp = "unloaded_" + workload;
49 |         exp = "%s_%s" % (prot, workload);
50 |         try:
51 |             options.protocol = "tcp"
52 |             start_servers(servers, options)
53 | 
54 |             o = copy.deepcopy(options);
55 |             o.tcp_client_ports = 1
56 |             o.client_max = 1
57 |             o.tcp_server_ports = 1
58 |             o.server_nodes = 1
59 |             o.first_server = 1
60 |             o.unloaded = 500
61 |             run_experiment(unloaded_exp, range(0, 1), o)
62 |             run_experiment(exp, clients, options)
63 |             do_cmd("tt print cp.tt", clients)
64 |         except Exception as e:
65 |             log(traceback.format_exc())
66 | 
67 |     set_sysctl_parameter("net.ipv4.tcp_congestion_control", congestion,
68 |             range(0, options.num_nodes))
69 |     log("Stopping nodes")
70 |     stop_nodes()
71 |     scan_logs()
72 | 
73 | # Generate plots and reports
74 | for workload, bw in load_info:
75 |     unloaded_exp = "unloaded_" + workload;
76 |     exp = "%s_%s" % (prot, workload);
77 | 
78 |     set_unloaded(unloaded_exp)
79 | 
80 |     # Generate slowdown plot.
81 |     log("Generating slowdown plot for %s" % (workload))
82 |     title = "%s %d nodes, %.1f Gbps" % (workload.capitalize(),
83 |             options.num_nodes, bw)
84 |     ax = start_slowdown_plot(title, 1000, exp)
85 |     plot_slowdown(ax, exp, "p99", "%s P99" % (prot))
86 |     plot_slowdown(ax, exp, "p50", "%s P50" % (prot))
87 |     ax.legend()
88 |     plt.tight_layout()
89 |     plt.savefig("%s/reports/%s_%s.pdf" % (options.log_dir, prot, workload))
90 | 
91 |     # Generate CDF of small-message RTTs.
92 |     log("Generating short message CDF for %s" % (workload))
93 |     x, y = get_short_cdf(exp)
94 |     start_cdf_plot(title, 10, 0.99e05, 1e-05, "RTT (usecs)",
95 |             "Cumulative Fraction Short Messages")
96 |     plt.plot(x, y, label=label, color=color)
97 |     plt.legend(loc="upper right", prop={'size': 9})
98 |     plt.savefig("%s/reports/short_cdf_%s.pdf" % (options.log_dir, workload))
99 | 


--------------------------------------------------------------------------------
/util/cp_vs_tcp:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | # Copyright (c) 2020-2023 Homa Developers
  4 | # SPDX-License-Identifier: BSD-1-Clause
  5 | 
  6 | # This cperf benchmark compares the performance of Homa with TCP.
  7 | # Type "cp_vs_tcp --help" for documentation.
  8 | 
  9 | from cperf import *
 10 | 
 11 | # Workloads, bandwidths, and running times to use by default.
 12 | load_info = [["w2", 3.2, 5], ["w3", 14, 10], ["w4", 20, 20], ["w5", 20, 30]]
 13 | 
 14 | parser = get_parser(description=
 15 |         'Measures slowdown as a function of message size for Homa and TCP.',
 16 |         usage='%(prog)s [options]')
 17 | parser.add_argument('--tcp', dest='tcp', type=boolean,
 18 |         default=True, help="Boolean value: indicates whether measurements "
 19 |                 "should be run on TCP (default: true)")
 20 | parser.add_argument('--dctcp', dest='dctcp', type=boolean,
 21 |         default=False, help="Boolean value:: indicates whether measurements "
 22 |                 "should be run on DCTCP (default: false)")
 23 | parser.add_argument('--servers', dest='num_servers', type=int, metavar='count',
 24 |         default=0, help="If nonzero, clients and servers will be segregated "
 25 |         "and this value indicates the number of server nodes; all other "
 26 |         "nodes will be clients. If 0, each node runs both a client and a "
 27 |         "server (default: 0)")
 28 | parser.add_argument('--skip-unloaded', dest='skip_unloaded', type=boolean,
 29 |         default=True, help="Boolean value:: true means don't measure"
 30 | 	    "Homa latency under low load (default: true)")
 31 | options = parser.parse_args()
 32 | init(options)
 33 | 
 34 | bw_multiplier = 0.5
 35 | if options.num_servers > 0:
 36 |     if options.num_servers >= options.num_nodes:
 37 |         raise Error("Illegal value %d for --servers option; must be less "
 38 |                 "than --nodes (%d)" % (options.num_servers,
 39 |                 options.num_nodes))
 40 |     options.servers = options.nodes[0:options.num_servers]
 41 |     options.clients = options.nodes[options.num_servers:len(options.nodes)]
 42 |     options.server_ports = options.server_ports * 2
 43 |     options.client_ports = options.client_ports * 2
 44 |     options.tcp_server_ports = options.tcp_server_ports * 2
 45 |     options.tcp_client_ports = options.tcp_client_ports * 2
 46 |     bw_multiplier = min(len(options.servers), len(options.clients)) \
 47 |             / len(options.clients)
 48 | 
 49 | if options.workload != "":
 50 |     load_info = [[options.workload, options.gbps, options.seconds]]
 51 | 
 52 | # First, run all of the experiments
 53 | if not options.plot_only:
 54 |     congestion = get_sysctl_parameter("net.ipv4.tcp_congestion_control")
 55 |     for workload, bw, seconds in load_info:
 56 |         options.workload = workload
 57 |         options.gbps = bw * bw_multiplier
 58 |         options.seconds = seconds
 59 |         unloaded_exp = "unloaded_" + workload
 60 |         homa_exp = "homa_" + workload
 61 |         tcp_exp = "tcp_" + workload
 62 |         dctcp_exp = "dctcp_" + workload
 63 |         try:
 64 |             options.protocol = "homa"
 65 | 
 66 |             if not options.skip_unloaded:
 67 |                 start_servers(unloaded_exp, options.nodes[1:2], options)
 68 |                 o = copy.deepcopy(options)
 69 |                 o.gbps = 0.0
 70 |                 o.client_ports = 1
 71 |                 o.client_max = 1
 72 |                 o.server_ports = 1
 73 |                 o.unloaded = 500
 74 |                 run_experiment(unloaded_exp, options.nodes[0:1], o)
 75 | 
 76 |             start_servers(homa_exp, options.servers, options)
 77 |             run_experiment(homa_exp, options.clients, options)
 78 | 
 79 |             if options.tcp:
 80 |                 options.protocol = "tcp"
 81 |                 set_sysctl_parameter("net.ipv4.tcp_congestion_control",
 82 |                         "cubic", range(0, options.num_nodes))
 83 |                 start_servers(tcp_exp, options.servers, options)
 84 |                 run_experiment(tcp_exp, options.clients, options)
 85 | 
 86 |             if options.dctcp:
 87 |                 options.protocol = "tcp"
 88 |                 set_sysctl_parameter("net.ipv4.tcp_congestion_control",
 89 |                         "dctcp", range(0, options.num_nodes))
 90 |                 start_servers(tcp_exp, options.servers, options)
 91 |                 run_experiment(dctcp_exp, options.clients, options)
 92 |         except Exception as e:
 93 |             log(traceback.format_exc())
 94 | 
 95 |     if options.tcp or options.dctcp:
 96 |         print("Resetting TCP congestion control to %s" % (congestion))
 97 |         set_sysctl_parameter("net.ipv4.tcp_congestion_control", congestion,
 98 |                 range(0, options.num_nodes))
 99 |     log("Stopping nodes")
100 |     stop_nodes()
101 |     scan_logs()
102 | 
103 | # Generate plots and reports
104 | for workload, bw, seconds in load_info:
105 |     unloaded_exp = "unloaded_" + workload
106 |     homa_exp = "homa_" + workload
107 |     tcp_exp = "tcp_" + workload
108 |     dctcp_exp = "dctcp_" + workload
109 |     scan_metrics(homa_exp)
110 | 
111 |     if not options.skip_unloaded:
112 |         set_unloaded(unloaded_exp)
113 | 
114 |     # Generate slowdown plot.
115 |     log("Generating slowdown plot for %s" % (workload))
116 |     title = "%s %d nodes, %.1f Gbps" % (workload.capitalize(),
117 |             options.num_nodes, bw)
118 |     ax = start_slowdown_plot(title, 1000, homa_exp)
119 |     if options.tcp:
120 |         plot_slowdown(ax, tcp_exp, "p99", "TCP P99", color=tcp_color)
121 |         plot_slowdown(ax, tcp_exp, "p50", "TCP P50", color=tcp_color2)
122 |     if options.dctcp:
123 |         plot_slowdown(ax, dctcp_exp, "p99", "DCTCP P99", color=dctcp_color)
124 |         plot_slowdown(ax, dctcp_exp, "p50", "DCTCP P50", color=dctcp_color2)
125 |     plot_slowdown(ax, homa_exp, "p99", "Homa P99", color=homa_color)
126 |     plot_slowdown(ax, homa_exp, "p50", "Homa P50", color=homa_color2)
127 |     ax.legend(loc="upper right", prop={'size': 9})
128 |     plt.tight_layout()
129 |     plt.savefig("%s/reports/vs_tcp_%s.pdf" % (options.log_dir, workload))
130 | 
131 |     # Generate CDF of small-message RTTs.
132 |     log("Generating short message CDF for %s" % (workload))
133 |     if not options.skip_unloaded:
134 |         unloaded_x, unloaded_y = get_short_cdf(unloaded_exp)
135 |     homa_x, homa_y = get_short_cdf(homa_exp)
136 |     if options.tcp:
137 |         tcp_x, tcp_y = get_short_cdf(tcp_exp)
138 |     if options.dctcp:
139 |         dctcp_x, dctcp_y = get_short_cdf(dctcp_exp)
140 |     start_cdf_plot(title, 10, 0.99e05, 1e-05, "RTT (usecs)",
141 |             "Cumulative Fraction Short Messages")
142 |     if options.tcp:
143 |         plt.plot(tcp_x, tcp_y, label="TCP", color=tcp_color)
144 |     if options.dctcp:
145 |         plt.plot(dctcp_x, dctcp_y, label="DCTCP", color=dctcp_color)
146 |     plt.plot(homa_x, homa_y, label="Homa", color=homa_color)
147 |     if not options.skip_unloaded:
148 |         plt.plot(unloaded_x, unloaded_y, label="Homa best case",
149 |                  color=unloaded_color)
150 |     plt.legend(loc="upper right", prop={'size': 9})
151 |     plt.savefig("%s/reports/short_cdf_%s.pdf" % (options.log_dir, workload))
152 | 


--------------------------------------------------------------------------------
/util/diff_metrics.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | # Copyright (c) 2018-2022 Homa Developers
 4 | # SPDX-License-Identifier: BSD-1-Clause
 5 | 
 6 | """
 7 | This program reads 2 Homa metrics files (/proc/net/homa_metrics)
 8 | and prints out all of the statistics that have changed, in the
 9 | same format as the original files.
10 | 
11 | Usage:
12 | diff_metrics file1 file2
13 | """
14 | 
15 | from __future__ import division, print_function
16 | from glob import glob
17 | from optparse import OptionParser
18 | import math
19 | import os
20 | import re
21 | import string
22 | import sys
23 | 
24 | # Contains values for all the metrics from the first file. Keys are
25 | # metric names, values are metric values.
26 | metrics = {}
27 | 
28 | def scan_first(name):
29 |     """
30 |     Scan the metrics file given by 'name' and record its metrics.
31 |     """
32 |     global metrics
33 |     f = open(name)
34 | 
35 |     for line in f:
36 |         match = re.match('^([^ ]+) *([0-9]+) *(.*)', line)
37 |         if not match:
38 |             print("Didn't match: %s\n" % (line))
39 |             continue
40 |         metrics[match.group(1)] = long(match.group(2))
41 |     f.close()
42 | 
43 | def scan_second(name):
44 |     """
45 |     Scan the metrics file given by 'name', compare its metrics to
46 |     those that have been recorded, and print an output line with
47 |     the difference, if there is any.
48 |     """
49 |     global metrics
50 |     f = open(name)
51 | 
52 |     for line in f:
53 |         match = re.match('^([^ ]+) *([0-9]+) *(.*)', line)
54 |         if not match:
55 |             print("Didn't match: %s\n" % (line))
56 |             continue
57 |         name = match.group(1)
58 |         value = long(match.group(2))
59 |         comment = match.group(3)
60 |         if not name in metrics:
61 |             print("No metric for %s\n" % (name))
62 |             continue
63 |         # print("%s: %d %d\n" % (name, metrics[name], value))
64 |         diff = value - metrics[name]
65 |         if diff == 0:
66 |             continue
67 |         print("%-22s %15lu  %s" % (name, diff, comment))
68 |     f.close()
69 | 
70 | if len(sys.argv) != 3:
71 |     printf("Usage: %s file file2\n" % sys.argv[0])
72 |     exit(1)
73 | 
74 | scan_first(sys.argv[1])
75 | scan_second(sys.argv[2])


--------------------------------------------------------------------------------
/util/diff_rtts.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | # Copyright (c) 2023 Homa Developers
  4 | # SPDX-License-Identifier: BSD-1-Clause
  5 | 
  6 | """
  7 | Compare two .rtts files to identify differences between them.
  8 | 
  9 | Usage: diff_rtts.py file1 file2
 10 | """
 11 | 
 12 | from __future__ import division, print_function
 13 | from glob import glob
 14 | from operator import itemgetter
 15 | from optparse import OptionParser
 16 | import math
 17 | import os
 18 | import re
 19 | import string
 20 | import sys
 21 | 
 22 | def read_rtts(file):
 23 |     """
 24 |     Read a .rtts file and returns a list of (length, slowdown) pairs.
 25 | 
 26 |     file:  Name of file to read
 27 |     """
 28 | 
 29 |     slowdowns = []
 30 |     f = open(file)
 31 |     for line in f:
 32 |         if line.startswith('#') or not line:
 33 |             continue
 34 |         match = re.match(' *([0-9]+) +([0-9.]+)', line)
 35 |         if not match:
 36 |             raise Exception("Malformed line in .rtts file: %s" % (line.rstrip()))
 37 |         length = int(match.group(1))
 38 |         rtt = float(match.group(2))
 39 | 
 40 |         # Optimal time (usecs) assumes 13 usec minimum, 25 Gbps network
 41 |         optimal = 13.0 + length*8/25000.0
 42 |         slowdown = rtt/optimal
 43 |         slowdowns.append([length, slowdown])
 44 |     f.close()
 45 |     return slowdowns
 46 | 
 47 | def avg_slowdown(slowdowns):
 48 |     """
 49 |     Return average slowdown from a list of (length, slowdown) pairs.
 50 | 
 51 |     slowdowns:  Input list
 52 |     """
 53 |     sum = 0.0
 54 |     for item in slowdowns:
 55 |         sum += item[1]
 56 |     return sum/len(slowdowns)
 57 | 
 58 | def deciles(slowdowns):
 59 |     """
 60 |     Given a list of (length, slowdown) pairs, divide into 10 groups by
 61 |     length, then returns 6 lists (each with one entry per decile),
 62 |     containing:
 63 |         * largest length in the decile
 64 |         * P50 slowdown for the decile
 65 |         * P90 slowdown for the decile
 66 |         * P99 slowdown for the decile
 67 |         * P99.9 slowdown for the decile
 68 |         * max slowdown for the decile
 69 |     """
 70 |     p50 = []
 71 |     p90 = []
 72 |     p99 = []
 73 |     p999 = []
 74 |     max = []
 75 |     cutoffs = []
 76 |     s = sorted(slowdowns, key = itemgetter(0))
 77 |     for split in range(1, 11):
 78 |         split_start = len(s)*(split-1)//10
 79 |         split_end = len(s)*split//10
 80 |         decile = []
 81 |         for i in range(split_start, split_end):
 82 |             decile.append(s[i][1])
 83 |         cutoffs.append(s[split_end-1][0])
 84 |         decile = sorted(decile)
 85 |         p50.append(decile[len(decile)//2])
 86 |         p90.append(decile[len(decile)*9//10])
 87 |         p99.append(decile[len(decile)*99//100])
 88 |         p999.append(decile[len(decile)*999//1000])
 89 |         max.append(decile[-1])
 90 |     return cutoffs, p50, p90, p99, p999, max
 91 | 
 92 | 
 93 | if len(sys.argv) != 3:
 94 |     print("Usage: diff_rtts.py file1 file2")
 95 |     exit(1)
 96 | f1 = sys.argv[1]
 97 | f2 = sys.argv[2]
 98 | 
 99 | s1 = read_rtts(f1)
100 | print("Average slowdown in %s: %.1f" % (f1, avg_slowdown(s1)))
101 | 
102 | s2 = read_rtts(sys.argv[2])
103 | print("Average slowdown in %s: %.1f" % (f2, avg_slowdown(s2)))
104 | print("")
105 | 
106 | c1, p50_1, p90_1, p99_1, p999_1, max_1 = deciles(s1)
107 | c2, p50_2, p90_2, p99_2, p999_2, max_2 = deciles(s2)
108 | 
109 | out = ""
110 | for cutoff in c1:
111 |     out += " %d" % (cutoff)
112 | print("Cutoffs for %s:%s" % (f1, out))
113 | out = ""
114 | for cutoff in c2:
115 |     out += " %d" % (cutoff)
116 | print("Cutoffs for %s:%s" % (f2, out))
117 | print("")
118 | 
119 | out = ""
120 | for val in p50_1:
121 |     out += " %5.1f" % (val)
122 | print("P50s for %s:%s" % (f1, out))
123 | out = ""
124 | for val in p50_2:
125 |     out += " %5.1f" % (val)
126 | print("P50s for %s:%s" % (f2, out))
127 | print("")
128 | 
129 | out = ""
130 | for val in p90_1:
131 |     out += " %5.1f" % (val)
132 | print("P90s for %s:%s" % (f1, out))
133 | out = ""
134 | for val in p90_2:
135 |     out += " %5.1f" % (val)
136 | print("P90s for %s:%s" % (f2, out))
137 | print("")
138 | 
139 | out = ""
140 | for val in p99_1:
141 |     out += " %5.1f" % (val)
142 | print("P99s for %s:%s" % (f1, out))
143 | out = ""
144 | for val in p99_2:
145 |     out += " %5.1f" % (val)
146 | print("P99s for %s:%s" % (f2, out))
147 | print("")
148 | 
149 | out = ""
150 | for val in p999_1:
151 |     out += " %5.1f" % (val)
152 | print("P99.9s for %s:%s" % (f1, out))
153 | out = ""
154 | for val in p999_2:
155 |     out += " %5.1f" % (val)
156 | print("P99.9s for %s:%s" % (f2, out))
157 | print("")
158 | 
159 | out = ""
160 | for val in max_1:
161 |     out += " %5.1f" % (val)
162 | print("Maxes for %s:%s" % (f1, out))
163 | out = ""
164 | for val in max_2:
165 |     out += " %5.1f" % (val)
166 | print("Maxes for %s:%s" % (f2, out))
167 | 
168 | exit(0)


--------------------------------------------------------------------------------
/util/dist.h:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2019-2023 Homa Developers
 2 |  * SPDX-License-Identifier: BSD-1-Clause
 3 |  */
 4 | 
 5 | /* This file defines the kernel contains information and supporting
 6 |  * functions for the workload distributions from the Homa paper.
 7 |  */
 8 | 
 9 | #ifndef _DIST_H
10 | #define _DIST_H
11 | 
12 | #include <random>
13 | #include <vector>
14 | 
15 | /**
16 |  * class @dist_point_gen: - Represents a CDF of message lengths and generates
17 |  * randomized lengths according to that CDF.
18 |  */
19 | class dist_point_gen {
20 | 	public:
21 | 	dist_point_gen(const char* workload, size_t max_size,
22 | 		double min_bucket_frac = .0025, double max_size_ratio = 1.2);
23 | 	int operator()(std::mt19937 &rand_gen);
24 | 	double get_mean() const {return dist_mean;}
25 | 	double dist_overhead(int mtu) const;
26 | 	std::vector<int> values() const;
27 | 	std::vector<double> cdf_fractions() const;
28 | 
29 | 	/**
30 | 	 * struct dist_point - Describes one point in a CDF of message lengths.
31 | 	 */
32 | 	struct cdf_point {
33 | 		/** @length: message length, in bytes. */
34 | 		size_t length;
35 | 
36 | 		/**
37 | 		 * @fraction: fraction of all messages that are this size
38 | 		 * or smaller.
39 | 		 */
40 | 		double fraction;
41 | 
42 | 		cdf_point(size_t length, double fraction)
43 | 			: length(length), fraction(fraction)
44 | 		{}
45 | 	};
46 | 
47 | 	private:
48 | 	/**
49 | 	 * @dist_points: collection of individual data points that
50 | 	 * make up this CDF (in increasing order of length).
51 | 	 */
52 | 	std::vector<cdf_point> dist_points;
53 | 
54 | 	/**
55 | 	 * @dist_mean: the average value of this distribution.
56 | 	 */
57 | 	double dist_mean;
58 | 
59 | 	/** @uniform_dist: used to generate values in the range [0, 1). */
60 | 	std::uniform_real_distribution<double> uniform_dist;
61 | 
62 | 	static int dist_msg_overhead(int length, int mtu);
63 | };
64 | #endif /* _DIST_H */
65 | 


--------------------------------------------------------------------------------
/util/dist_test.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2023 Homa Developers
 2 |  * SPDX-License-Identifier: BSD-1-Clause
 3 |  */
 4 | 
 5 | #include <stdio.h>
 6 | 
 7 | #include <algorithm>
 8 | #include <chrono>
 9 | #include <iostream>
10 | #include <map>
11 | 
12 | #include "dist.h"
13 | #include "test_utils.h"
14 | 
15 | /**
16 |  * define HOMA_MAX_MESSAGE_LENGTH - Maximum bytes of payload in a Homa
17 |  * request or response message.
18 |  */
19 | #define HOMA_MAX_MESSAGE_LENGTH 1000000
20 | 
21 | /** @rand_gen: random number generator. */
22 | static std::mt19937 rand_gen(
23 | 		std::chrono::system_clock::now().time_since_epoch().count());
24 | 
25 | /* This file tests the dist.cc/dist.h files and dist_point_gen class. It will
26 |  * print the CDF for every generated length, a histogram to show how often each
27 |  * length was generated, the sizes of the given distribution, and finally the
28 |  * mean, range, and overhead of the distribution requested.
29 |  *
30 |  * Produced by:
31 |  * ./dist_test workload [number of points] [max message length]
32 |  *
33 |  * @workload: - the distribution requested for the test. Can be workload 1-5
34 |  * or a fixed distribution.
35 |  *
36 |  * @number_of_points: - the number of points that the dist_point_gen will
37 |  * randomly generate for the test. (Default = 10).
38 |  *
39 |  * @max_message_length: - the maximum size of a message.
40 |  */
41 | int main (int argc, char**argv)
42 | {
43 | 	int max_message_length = HOMA_MAX_MESSAGE_LENGTH;
44 | 	size_t num_points = 10;
45 | 	if (argc < 2) {
46 | 		fprintf(stderr, "Usage: %s workload [# points] [max_message_length]",
47 | 				argv[0]);
48 | 	}
49 | 	if (argc > 3) {
50 | 		max_message_length = atoi(argv[3]);
51 | 	}
52 | 	if (argc > 2) {
53 | 		num_points = atoi(argv[2]);
54 | 	}
55 | 
56 | 	dist_point_gen generator(argv[1], max_message_length);
57 | 	std::map<int, int> hist;
58 | 	std::map<int, float> cdf;
59 | 
60 | 	uint64_t start = rdtsc();
61 | 	for (size_t i = 0; i < 1'000'000; i++) {
62 | 		generator(rand_gen);
63 | 	}
64 | 	uint64_t end = rdtsc();
65 | 	double avg_ns = double(end-start)/(get_cycles_per_sec()*1e-09)/1'000'000;
66 | 
67 | 	for (size_t i = 0; i < num_points; i++) {
68 | 		 hist[generator(rand_gen)]++;
69 | 	}
70 | 
71 | 	int count = 0;
72 | 	for (std::map<int, int>::const_iterator it = hist.begin();
73 | 			it != hist.end(); ++it) {
74 | 		count += it->second;
75 | 		cdf[it->first] = count;
76 | 	}
77 | 
78 | 	printf("\nCDF:\n");
79 | 	for (const auto [key, val] : cdf) {
80 | 		printf("%7d %6.4f\n", key, val/num_points);
81 | 	}
82 | 
83 | 	printf("\nHistogram:\n");
84 | 	for (const auto [key, val] : hist) {
85 | 		printf("%d %d\n", key, val);
86 | 	}
87 | 
88 | 	std::vector<int> sizes = generator.values();
89 | 	printf("\nSizes:\n");
90 | 	for (const int num : sizes) {
91 | 		printf("%d\n", num);
92 | 	}
93 | 
94 | 	printf("\nMean: %.1f\n", generator.get_mean());
95 | 	printf("Range: min %d, max %d\n", hist.begin()->first, hist.rbegin()->first);
96 | 	printf("Overhead (1500B packets): %.3f\n", generator.dist_overhead(1500));
97 | 	printf("Average time/sample for generator: %.1f ns\n", avg_ns);
98 | }


--------------------------------------------------------------------------------
/util/dist_to_proto.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2023 Homa Developers
 2 |  * SPDX-License-Identifier: BSD-1-Clause
 3 |  */
 4 | 
 5 | #include "dist.h"
 6 | extern "C" {
 7 | #include "homa.h"
 8 | }
 9 | #include "iostream"
10 | 
11 | /**
12 |  * This program takes one of the five workload distributions and converts
13 |  * it into a fragment of a textformat protobuf used in distbench. It will first
14 |  * merge buckets and truncate cdf_point sizes according to command line
15 |  * arguments then write the cdf_points to stdout and the interval conversion
16 |  * to stderr.
17 |  *
18 |  * Usage:
19 |  * ./dist_to_proto workload [max message length] [min bucket frac]
20 |  *                          [max size ratio] [gigabits per second]
21 |  */
22 | int main (int argc, char**argv)
23 | {
24 | 	int max_message_length = HOMA_MAX_MESSAGE_LENGTH;
25 | 	double min_bucket_frac = 0.0025;
26 | 	double max_size_ratio = 1.2;
27 | 	double gbps = 20.0;
28 | 	if (argc < 2) {
29 | 		fprintf(stderr, "Usage: %s workload [max message length] "
30 | 				"[min bucket frac] [max size ratio] [gbps]\n",
31 | 				argv[0]);
32 | 		exit(1);
33 | 	}
34 | 	if (argc > 2) {
35 | 		max_message_length = atoi(argv[2]);
36 | 	}
37 | 	if (argc > 3) {
38 | 		min_bucket_frac = std::stod(argv[3]);
39 | 	}
40 | 	if (argc > 4) {
41 | 		max_size_ratio = std::stod(argv[4]);
42 | 	}
43 | 	if (argc > 5) {
44 | 		gbps = std::stod(argv[5]);
45 | 	}
46 | 
47 | 	dist_point_gen generator(argv[1], max_message_length,
48 | 			min_bucket_frac, max_size_ratio);
49 | 	std::vector<int> values = generator.values();
50 | 	std::vector<double> fractions = generator.cdf_fractions();
51 | 
52 | 	for (size_t i = 0; i < values.size(); ++i) {
53 | 		printf("    cdf_points { value: %d, cdf: %20.19f }\n",
54 | 				values[i], fractions[i]);
55 | 	}
56 | 
57 | 	/**
58 | 	 * Convert average size to bits, then divide by gbps and round up to get
59 | 	 * nanoseconds, then multiply by 2 because request size and response
60 | 	 * size are equal
61 | 	 */
62 | 	double interval_ns = (std::ceil( (generator.get_mean() * 8.0) / gbps))
63 | 			* 2;
64 | 	fprintf(stderr,"%.0f", interval_ns);
65 | }


--------------------------------------------------------------------------------
/util/get_time_trace.c:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2019-2022 Homa Developers
 2 |  * SPDX-License-Identifier: BSD-1-Clause
 3 |  */
 4 | 
 5 | /**
 6 |  * This program will read timetrace information from the kernel and
 7 |  * dump it on stdout. Invoke with no parameters.
 8 |  */
 9 | 
10 | #include <errno.h>
11 | #include <stdio.h>
12 | #include <stdlib.h>
13 | #include <string.h>
14 | #include <unistd.h>
15 | #include <sys/time.h>
16 | #include <stdint.h>
17 | 
18 | #include "test_utils.h"
19 | 
20 | #define BUF_SIZE 10000000
21 | char buffer[BUF_SIZE];
22 | 
23 | int main(int argc, char** argv) {
24 | 	// Fetch the time trace data from the kernel.
25 | 	int length = syscall(333, buffer, BUF_SIZE);
26 | 	if (length < 0) {
27 | 		printf("Error in get_timetrace: %s (%d)",
28 | 				strerror(errno), errno);
29 | 		return 1;
30 | 	}
31 | 	printf("Kernel returned timetrace with %d bytes\n", length);
32 | 	if (length == BUF_SIZE) {
33 | 		printf("Not enough space in buffer for complete timetrace.\n");
34 | 	}
35 | 	buffer[length-1] = 0;
36 | 
37 | 	double cps = get_cycles_per_sec();
38 | 	printf("Cycles per second: %g\n", cps);
39 | 
40 | 	// Scan through the records in the buffer. For each record, replace
41 | 	// the timestamp with more detailed information in ns, and output
42 | 	// the modified record.
43 | 	char* current = buffer;
44 | 	uint64_t start_time, prev_time;
45 | 	start_time = 0;
46 | 	while (1) {
47 | 		char *stamp_end;
48 | 		double ns, delta_ns;
49 | 		// printf("Current text: %.50s", current);
50 | 		uint64_t stamp = strtoull(current, &stamp_end, 10);
51 | 		if (stamp == 0) {
52 | 			break;
53 | 		}
54 | 		if (start_time == 0) {
55 | 			start_time = stamp;
56 | 			prev_time = stamp;
57 | 		}
58 | 		ns = (1e09 * (double)(stamp - start_time)) / cps;
59 | 		delta_ns = (1e09 * (double)(stamp - prev_time)) / cps;
60 | 		printf("%8.1f ns (+%6.1f ns):", ns, delta_ns);
61 | 
62 | 		for (current = stamp_end;
63 | 				(*current != 0) && (*current != '\n');
64 | 				current++) {
65 | 			putc(*current, stdout);
66 | 		}
67 | 		putc('\n', stdout);
68 | 		prev_time = stamp;
69 | 	}
70 | 	return 0;
71 | }
72 | 
73 | 


--------------------------------------------------------------------------------
/util/get_traces:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (c) 2023 Homa Developers
 4 | # SPDX-License-Identifier: BSD-1-Clause
 5 | 
 6 | # Usage:
 7 | # get_traces first last dst
 8 | #
 9 | # This script will retrieve node.tt timetraces from the home directory
10 | # of the nodes with numbers from first to last, inclusive, and store them
11 | # in files nodeN.tt in directory dst.
12 | 
13 | if [ $# -ne 3 ]; then
14 |     echo "Usage: get_traces first last dst"
15 |     exit 1
16 | fi
17 | first=$1
18 | last=$2
19 | dst=$3
20 | 
21 | for ((i = $first ; i <= $last; i++)); do
22 |     node=node$i
23 |     echo $node
24 |     mkdir -p $dst
25 |     cl ssh $node cat node.tt > $dst/$node.tt
26 | done
27 | 


--------------------------------------------------------------------------------
/util/inc_tput.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2024 Homa Developers
 2 |  * SPDX-License-Identifier: BSD-1-Clause
 3 |  */
 4 | 
 5 | /* This program measures the throughput of atomic increments in the face
 6 |  * of many concurrent cores invoking it.
 7 |  */
 8 | 
 9 | #include <stdio.h>
10 | 
11 | #include <stdlib.h>
12 | #include <sys/time.h>
13 | #include <unistd.h>
14 | 
15 | #include <atomic>
16 | #include <thread>
17 | #include <vector>
18 | 
19 | std::atomic_int value = 0;
20 | std::vector<int> thread_counts;
21 | 
22 | /**
23 |  * rdtsc(): return the current value of the fine-grain CPU cycle counter
24 |  * (accessed via the RDTSC instruction).
25 |  */
26 | inline static uint64_t rdtsc(void)
27 | {
28 | 	uint32_t lo, hi;
29 | 	__asm__ __volatile__("rdtsc" : "=a" (lo), "=d" (hi));
30 | 	return (((uint64_t)hi << 32) | lo);
31 | }
32 | 
33 | void increment(int index)
34 | {
35 | 	while (1) {
36 | 		value.fetch_add(1);
37 | 		thread_counts[index]++;
38 | 	}
39 | }
40 | 
41 | int main(int argc, char** argv)
42 | {
43 | 	int num_threads = 1;
44 | 	int i;
45 | 	std::vector<int> old_counts;
46 | 
47 | 	if (argc == 2) {
48 | 		char *end;
49 | 		num_threads = strtol(argv[1], &end, 0);
50 | 		if (*end != 0) {
51 | 			printf("Illegal argument %s: must be integer\n",
52 | 					argv[1]);
53 | 			exit(1);
54 | 		}
55 | 	} else if (argc != 1) {
56 | 		printf("Usage: %s [num_threads]\n", argv[0]);
57 | 	}
58 | 
59 | 	for (i = 0; i < num_threads; i++) {
60 | 		thread_counts.emplace_back(0);
61 | 		old_counts.emplace_back(0);
62 | 		new std::thread(increment, i);
63 | 	}
64 | 
65 | 	struct timeval prev_time, cur_time;
66 | 	gettimeofday(&prev_time, nullptr);
67 | 	uint64_t old_value = value;
68 | 	while (1) {
69 | 		sleep(1);
70 | 		gettimeofday(&cur_time, nullptr);
71 | 		uint64_t new_value = value;
72 | 		double diff = new_value - old_value;
73 | 		double secs = cur_time.tv_sec - prev_time.tv_sec;
74 | 		secs += 1e-6*(cur_time.tv_usec - prev_time.tv_usec);
75 | 		printf("%.2f Mops/sec [", (diff/secs)*1e-6);
76 | 		const char *sep = "";
77 | 		for (i = 0; i < num_threads; i++) {
78 | 			int new_count = thread_counts[i];
79 | 			diff = new_count - old_counts[i];
80 | 			printf("%s%.2f", sep, (diff/secs)*1e-6);
81 | 			sep = " ";
82 | 			old_counts[i] = new_count;
83 | 		}
84 | 		printf("]\n");
85 | 		prev_time = cur_time;
86 | 		old_value = new_value;
87 | 	}
88 | }


--------------------------------------------------------------------------------
/util/plot.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | # Copyright (c) 2023 Homa Developers
  4 | # SPDX-License-Identifier: BSD-1-Clause
  5 | 
  6 | # This file provides a library of functions for generating plots.
  7 | 
  8 | import matplotlib
  9 | import matplotlib.pyplot as plt
 10 | import os
 11 | from pathlib import Path
 12 | import re
 13 | import string
 14 | import sys
 15 | 
 16 | from cperf import *
 17 | 
 18 | # Standard colors for plotting
 19 | color_green =      '#00B000'
 20 | color_blue =       '#1759BB'
 21 | color_red =        '#d62728'
 22 | tcp_color =      '#00B000'
 23 | tcp_color2 =     '#5BD15B'
 24 | tcp_color3 =     '#96E296'
 25 | homa_color =     '#1759BB'
 26 | homa_color2 =    '#6099EE'
 27 | homa_color3 =    '#A6C6F6'
 28 | dctcp_color =    '#7A4412'
 29 | dctcp_color2 =   '#CB701D'
 30 | dctcp_color3 =   '#EAA668'
 31 | unloaded_color = '#d62728'
 32 | 
 33 | matplotlib.rc('mathtext', default='regular')
 34 | 
 35 | # Dictionary containing all data that has been read from files so far.
 36 | # Keys are file names, values are dictionaries of columns for that file,
 37 | # in which keys are column names and values are lists of the values
 38 | # in that column.
 39 | file_data = {}
 40 | 
 41 | def __read_file(file):
 42 |     """
 43 |     Read a file and add its contents to the file_data variable. If the
 44 |     file has already been read, then this function does nothing.
 45 | 
 46 |     file:   Path name of the file to read. Lines starting with '#' are
 47 |             considered comments and ignored, as are blank lines. Of the
 48 |             non-blank non-comment lines, the first contains space-separated
 49 |             column names, and the others contain data for those columns.
 50 |     """
 51 |     global file_data
 52 | 
 53 |     if file in file_data:
 54 |         return
 55 |     columns = {}
 56 |     names = None
 57 |     f = open(file)
 58 |     for line in f:
 59 |         fields = line.strip().split()
 60 |         if len(fields) == 0:
 61 |             continue
 62 |         if fields[0].startswith('#'):
 63 |             continue
 64 |         if not names:
 65 |             names = fields
 66 |             for n in names:
 67 |                 if n in columns:
 68 |                     print('Duplicate column name %s in %s' % (file, n),
 69 |                             file=sys.stderr())
 70 |                 columns[n] = []
 71 |         else:
 72 |             if len(fields) != len(names):
 73 |                 print('Bad line in %s: %s (expected %d columns, got %d)'
 74 |                         % (file, line.rstrip(), len(columns), len(fields)),
 75 |                         file=sys.stderr)
 76 |                 continue
 77 |             for i in range(0, len(names)):
 78 |                 try:
 79 |                     value = float(fields[i])
 80 |                 except ValueError:
 81 |                     value = fields[i]
 82 |                 columns[names[i]].append(value)
 83 |     f.close()
 84 |     file_data[file] = columns
 85 | 
 86 | def get_column(file, column):
 87 |     """
 88 |     Return a list containing the values of a given column in a given file.
 89 | 
 90 |     file:    Path name of the file containing the desired column.
 91 |     column:  Name of the column within that file.
 92 |     """
 93 | 
 94 |     __read_file(file)
 95 |     if not column in file_data[file]:
 96 |         raise Exception('Column %s doesn\'t exist in %s' % (column, name))
 97 |     return file_data[file][column]
 98 | 
 99 | def get_column_names(file):
100 |     """
101 |     Returns a list containing the names of all of the columns in file.
102 |     """
103 | 
104 |     __read_file(file)
105 |     return file_data[file].keys()
106 | 
107 | def get_numbers(file):
108 |     """
109 |     Scans all of the column names in file for numbers and returns a
110 |     sorted list of all the unique numbers found.
111 |     """
112 | 
113 |     numbers = set()
114 |     for name in get_column_names(file):
115 |         match = re.match('[^0-9]*([0-9]+)', name)
116 |         if match:
117 |             numbers.add(int(match.group(1)))
118 |     return sorted(list(numbers))
119 | 
120 | def max_value(file, columns):
121 |     """
122 |     Returns the largest value in a set of columns.
123 | 
124 |     columns:   A list of column names.
125 |     """
126 | 
127 |     overall_max = None
128 |     for column in columns:
129 |         col_max = max(get_column(file, column))
130 |         if (overall_max == None) or (col_max > overall_max):
131 |             overall_max = col_max
132 |     return overall_max
133 | 
134 | def node_name(file):
135 |     """
136 |     Given the name of a trace file, return a shorter name that can be
137 |     used (e.g. in titles) to identify the node represented by the file.
138 |     """
139 |     name = Path(file).stem
140 |     i = name.rfind('_')
141 |     if i != -1:
142 |         name = name[i+1:]
143 |     return name
144 | 
145 | def start_plot(max_x, max_y, title="", x_label="", y_label="", size=10,
146 |        figsize=[6,4]):
147 |     """
148 |     Create a basic pyplot graph without plotting any data. Returns the
149 |     Axes object for the plot.
150 | 
151 |     max_x:             Maximum x-coordinate
152 |     max_y:             Maximum y-coordinate
153 |     title:             Title for the plot; empty means no title
154 |     x_label:           Label for x-axis
155 |     y_label:           Label for y-axis
156 |     size:              Size to use for fonts
157 |     figsize:           Dimensions of plot
158 |     """
159 | 
160 |     fig = plt.figure(figsize=figsize)
161 |     ax = fig.add_subplot(111)
162 |     if title != '':
163 |         ax.set_title(title, size=size)
164 |     ax.set_xlim(0, max_x)
165 |     ax.set_ylim(1, max_y)
166 |     if x_label:
167 |         ax.set_xlabel(x_label, size=size)
168 |     if y_label:
169 |         ax.set_ylabel(y_label, size=size)
170 |     return ax
171 | 
172 | def plot_colors(file):
173 |     """
174 |     Generates a test plot that shows the standard colors defined above.
175 | 
176 |     file: Name of PDF file in which to write the plot.
177 |     """
178 | 
179 |     ax = start_plot(200, 100, title='Standard Colors')
180 |     ax.plot([0, 200], [65, 65], color=color_green,    label='color_green')
181 |     ax.plot([0, 200], [60, 60], color=color_blue,     label='color_blue')
182 |     ax.plot([0, 200], [55, 55], color=color_red,      label='color_red')
183 |     ax.plot([0, 200], [50, 50], color=tcp_color,      label='tcp_color')
184 |     ax.plot([0, 200], [45, 45], color=tcp_color2,     label='tcp_color2')
185 |     ax.plot([0, 200], [40, 40], color=tcp_color3,     label='tcp_color3')
186 |     ax.plot([0, 200], [35, 35], color=homa_color,     label='homa_color')
187 |     ax.plot([0, 200], [30, 30], color=homa_color2,    label='homa_color2')
188 |     ax.plot([0, 200], [25, 25], color=homa_color3,    label='homa_color3')
189 |     ax.plot([0, 200], [20, 20], color=dctcp_color,    label='dctcp_color')
190 |     ax.plot([0, 200], [15, 15], color=dctcp_color2,   label='dctcp_color2')
191 |     ax.plot([0, 200], [10, 10], color=dctcp_color3,   label='dctcp_color3')
192 |     ax.plot([0, 200], [5, 5],   color=unloaded_color, label='unloaded_color')
193 |     ax.legend(loc='upper right', prop={'size': 9})
194 |     plt.tight_layout()
195 |     plt.savefig(file)


--------------------------------------------------------------------------------
/util/plot_tthoma.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | # Copyright (c) 2023 Homa Developers
 4 | # SPDX-License-Identifier: BSD-1-Clause
 5 | 
 6 | # This file provides a collection of functions that plot data generated
 7 | # by tthoma.py. Invoke with the --help option for more information.
 8 | 
 9 | from glob import glob
10 | from optparse import OptionParser
11 | import math
12 | import matplotlib
13 | import matplotlib.pyplot as plt
14 | import os
15 | from pathlib import Path
16 | import re
17 | import string
18 | import sys
19 | 
20 | import plot
21 | 
22 | def backlog(data_file, plot_file):
23 |     """
24 |     Generates a plot of network backlog data produced by the "net"
25 |     analyzer of tthoma.py.
26 | 
27 |     data_file:   Backlog data file generated by tthoma.py.
28 |     plot_file:   Name of the file in which to output a plot.
29 |     """
30 |     global options
31 | 
32 |     cores = plot.get_numbers(data_file)
33 |     if options.cores:
34 |         cores = sorted(list(set(cores).intersection(options.cores)))
35 |     columns = []
36 |     core_names = []
37 |     for core in cores:
38 |         columns.append('Back%d' % core)
39 |         core_names.append('C%02d' % core)
40 |     times = plot.get_column(data_file, 'Time')
41 |     xmax = max(times)
42 |     ymax = plot.max_value(data_file, columns)
43 | 
44 |     ax = plot.start_plot(xmax, ymax, x_label='Time',
45 |             y_label='KB In Flight For %s Cores' % (plot.node_name(data_file)))
46 |     for i in range(len(columns)):
47 |         ax.plot(times, plot.get_column(data_file, columns[i]),
48 |                 label=core_names[i], linewidth=0.8)
49 |     ax.legend(loc='upper right', prop={'size': 9})
50 |     plt.tight_layout()
51 |     plt.savefig(plot_file)
52 | 
53 | 
54 | def colors(plot_file):
55 |     """
56 |     Generates a plot displaying standard colors.
57 | 
58 |     plot_file:   Name of the file in which to output a plot.
59 |     """
60 | 
61 |     plot.plot_colors(plot_file)
62 | 
63 | # Parse command-line options.
64 | parser = OptionParser(description=
65 |         'Reads data output by tthoma.py and generates a plot. func is '
66 |         'the name of a function in this file, which will be invoked to '
67 |         'generate a particular plot; args provide additional information to '
68 |         'func if needed. Read the in-code documentation for the functions '
69 |         'for details on what kinds of plots are available.',
70 |         usage='%prog [options] func arg arg ...',
71 |         conflict_handler='resolve')
72 | parser.add_option('--cores', dest='cores', default=None,
73 |         metavar='CORES', help='space-separated list of integer core numbers; '
74 |         'plots will include data from these cores only, where appropriate')
75 | (options, args) = parser.parse_args()
76 | 
77 | if options.cores != None:
78 |     options.cores = list(map(int, options.cores.split(" ")))
79 | 
80 | if len(args) < 1:
81 |     print('No func was specified')
82 |     parser.print_help()
83 |     exit(1)
84 | 
85 | if not args[0] in locals():
86 |     print('There is no function %s' % (args[0]))
87 |     parser.print_help()
88 |     exit(1)
89 | 
90 | locals()[args[0]](*args[1:])


--------------------------------------------------------------------------------
/util/receive_raw.c:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2019-2022 Homa Developers
 2 |  * SPDX-License-Identifier: BSD-1-Clause
 3 |  */
 4 | 
 5 | /* This is a test program that uses a raw socket to receive packets
 6 |  * on a given protocol and print their contents.
 7 |  *
 8 |  * Usage: receive_raw [protocol]
 9 |  */
10 | 
11 | #include <errno.h>
12 | #include <netdb.h>
13 | #include <stdio.h>
14 | #include <string.h>
15 | #include <stdlib.h>
16 | #include <unistd.h>
17 | #include <netinet/ip.h>
18 | #include <sys/types.h>
19 | #include <sys/socket.h>
20 | 
21 | #include "../homa.h"
22 | 
23 | int main(int argc, char** argv) {
24 | 	int fd;
25 | 	int protocol;
26 | 	ssize_t size;
27 | #define BUF_SIZE 2000
28 | 	char buffer[BUF_SIZE];
29 | 	struct ip* ip_header = (struct ip *) buffer;
30 | 	int header_length;
31 | 
32 | 	if (argc >= 2) {
33 | 		protocol = strtol(argv[1], NULL, 10);
34 | 		if (protocol == 0) {
35 | 			printf("Bad protocol number %s; must be integer\n",
36 | 					argv[3]);
37 | 			exit(1);
38 | 		}
39 | 	} else {
40 | 		protocol = IPPROTO_HOMA;
41 | 	}
42 | 
43 | 	fd = socket(AF_INET, SOCK_RAW, protocol);
44 | 	if (fd < 0) {
45 | 		printf("Couldn't open raw socket: %s\n", strerror(errno));
46 | 		exit(1);
47 | 	}
48 | 
49 | 	while (1) {
50 | 		size = recvfrom(fd, buffer, BUF_SIZE, 0,  NULL, 0);
51 | 		if (size < 0) {
52 | 			printf("Error receiving packet: %s\n", strerror(errno));
53 | 			exit(1);
54 | 		}
55 | 		header_length = 4 * ip_header->ip_hl;
56 | 		// printf("IP header length: %d bytes\n", header_length);
57 | 		buffer[size] = 0;
58 | 		printf("%s\n", buffer + header_length);
59 | 	}
60 | }
61 | 


--------------------------------------------------------------------------------
/util/scratch.c:
--------------------------------------------------------------------------------
 1 | // This is a scratch file used for writing temporary code to test
 2 | // how it works. It has no long term value.
 3 | 
 4 | #include <stdio.h>
 5 | #include <stdlib.h>
 6 | #include <unistd.h>
 7 | #include <arpa/inet.h>
 8 | #include <sys/types.h>
 9 | 
10 | #define print_type(name) printf("%s has type '%s'\n", #name, "__typeof__(name)");
11 | 
12 | int main(int argc, char** argv) {
13 | //	int value;
14 | //	if (argc >= 2) {
15 | //		sscanf(argv[1], "%x", &value);
16 | //	} else {
17 | //		value = 0x12345;
18 | //	}
19 | 	
20 | 	uint64_t x = 0x1234500001;
21 | 	uint64_t y = (x + 63) & ~0x3f;
22 | 	printf("x: %lx, y: %lx\n", x, y);
23 | 	return 0;
24 | }
25 | 
26 | 


--------------------------------------------------------------------------------
/util/send_many:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | # Repeatedly invoke sendHoma (see if the system hangs because of
 3 | # sk_buff exhaustion?)
 4 | 
 5 | count=1
 6 | while :
 7 | do
 8 |     ./homaSend rc71 "Test message #$count"
 9 |     echo "Sent message #$count"
10 |     count=$((count+1))
11 | done


--------------------------------------------------------------------------------
/util/send_raw.c:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2019-2022 Homa Developers
 2 |  * SPDX-License-Identifier: BSD-1-Clause
 3 |  */
 4 | 
 5 | /* This is a test program that will send a packet to a given
 6 |  * IP protocol, with given contents.
 7 |  *
 8 |  * Usage: send_raw hostName contents [protocol]
 9 |  */
10 | 
11 | #include <errno.h>
12 | #include <netdb.h>
13 | #include <stdio.h>
14 | #include <string.h>
15 | #include <stdlib.h>
16 | #include <unistd.h>
17 | #include <sys/types.h>
18 | #include <sys/socket.h>
19 | 
20 | #include "homa.h"
21 | #include "test_utils.h"
22 | 
23 | int main(int argc, char** argv) {
24 | 	int fd, status;
25 | 	struct addrinfo *result;
26 | 	struct addrinfo hints;
27 | 	char *message;
28 | 	char *host;
29 | 	int protocol;
30 | 	union sockaddr_in_union *addr;
31 | 	uint8_t *bytes;
32 | 
33 | 	if (argc < 3) {
34 | 		printf("Usage: %s hostName contents [protocol]\n", argv[0]);
35 | 		exit(1);
36 | 	}
37 | 	host = argv[1];
38 | 	message = argv[2];
39 | 	if (argc >= 4) {
40 | 		protocol = strtol(argv[3], NULL, 10);
41 | 		if (protocol == 0) {
42 | 			printf("Bad protocol number %s; must be integer\n",
43 | 					argv[3]);
44 | 			exit(1);
45 | 		}
46 | 	} else {
47 | 		protocol = IPPROTO_HOMA;
48 | 	}
49 | 
50 | 	memset(&hints, 0, sizeof(struct addrinfo));
51 | 	hints.ai_family = AF_INET;
52 | 	hints.ai_socktype = SOCK_DGRAM;
53 | 	status = getaddrinfo(host, "80", &hints, &result);
54 | 	if (status != 0) {
55 | 		printf("Couldn't look up address for %s: %s\n",
56 | 				host, gai_strerror(status));
57 | 		exit(1);
58 | 	}
59 | 	addr = (union sockaddr_in_union*) result->ai_addr;
60 | 	bytes = (uint8_t *) &addr->in4.sin_addr;
61 | 	printf("Destination address: %x (%d.%d.%d.%d)\n", addr->in4.sin_addr.s_addr,
62 | 		bytes[0], bytes[1], bytes[2], bytes[3]);
63 | 
64 | 	fd = socket(AF_INET, SOCK_RAW, protocol);
65 | 	if (fd < 0) {
66 | 		printf("Couldn't open raw socket: %s\n", strerror(errno));
67 | 		exit(1);
68 | 	}
69 | 
70 | 	status = sendto(fd, message, strlen(message), 0, result->ai_addr,
71 | 			result->ai_addrlen);
72 | 	if (status < 0) {
73 | 		printf("Error in sendto: %s\n", strerror(errno));
74 | 	} else {
75 | 		printf("Sendto succeeded\n");
76 | 	}
77 | 	exit(0);
78 | }
79 | 
80 | 


--------------------------------------------------------------------------------
/util/smi.cc:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) 2022 Homa Developers
  2 |  * SPDX-License-Identifier: BSD-1-Clause
  3 |  */
  4 | 
  5 | /* This program spawns a collection of threads on different cores to
  6 |  * detect SMI interrupts, during which all of the cores are simultaneously
  7 |  * paused. It outputs information about the frequency and length of the
  8 |  * SMIs.
  9 |  *
 10 |  * Usage:
 11 |  * smi core core ...
 12 |  */
 13 | 
 14 | #include <stdio.h>
 15 | #include <string.h>
 16 | #include <stdlib.h>
 17 | 
 18 | #include <thread>
 19 | #include <vector>
 20 | 
 21 | #include "test_utils.h"
 22 | 
 23 | #define usecs(x) (to_seconds(x)*1e06)
 24 | 
 25 | #define ms(x) (to_seconds(x)*1e03)
 26 | 
 27 | /**
 28 |  * Holds information about gaps for a single thread (periods of time when
 29 |  * that thread was not executing).
 30 |  */
 31 | #define MAX_GAPS 1000
 32 | struct thread_gaps {
 33 | 	/* Index in gaps of next gap to fill in. */
 34 | 	int next;
 35 | 
 36 | 	/* Used only wnen scanning: current gap being considered. */
 37 | 	int current;
 38 | 
 39 | 	struct {
 40 | 		uint64_t start;
 41 | 		uint64_t end;
 42 | 	} gaps[MAX_GAPS];
 43 | 
 44 | 	thread_gaps()
 45 | 		: next(0), current(0), gaps()
 46 | 	{}
 47 | };
 48 | 
 49 | /**
 50 |  * Used to collect information about identified gaps, in order to find
 51 |  * previous gaps of about the same duration.
 52 |  */
 53 | struct prev_gap {
 54 | 	/* Starting time for the gap. */
 55 | 	uint64_t start;
 56 | 
 57 | 	/* How long it lasted, in rdtsc units. */
 58 | 	uint64_t duration;
 59 | 
 60 | 	prev_gap(uint64_t start, uint64_t duration)
 61 | 			: start(start), duration(duration)
 62 | 	{}
 63 | };
 64 | 
 65 | /* Minimum length (in rdtsc cycles) for a gap to be considered meaningful. */
 66 | uint64_t min_gap_length;
 67 | 
 68 | /**
 69 |  * record_gaps() - Loop infinitely, recording info about execution gaps,
 70 |  * until gaps is full.
 71 |  * @gaps:    Structure to fill in with gap information.
 72 |  * @core:    Core on which to run.
 73 |  */
 74 | void record_gaps(struct thread_gaps *gaps, int core)
 75 | {
 76 | 	pin_thread(core);
 77 | //	printf("Pinned thread to core %d\n", core);
 78 | 	while (gaps->next < MAX_GAPS) {
 79 | 		uint64_t start, end;
 80 | 		start = rdtsc();
 81 | 		while (1) {
 82 | 			end = rdtsc();
 83 | 			if ((end - start) >= min_gap_length) {
 84 | 				break;
 85 | 			}
 86 | 			start = end;
 87 | 		}
 88 | 		gaps->gaps[gaps->next].start = start;
 89 | 		gaps->gaps[gaps->next].end = end;
 90 | 		gaps->next++;
 91 | 	}
 92 | }
 93 | 
 94 | int main(int argc, char** argv) {
 95 | 	std::vector<int> cores;
 96 | 	int i, num_cores;
 97 | 	uint64_t time0;
 98 | 
 99 | 	/* Minimum gap is 1 usec. */
100 | 	min_gap_length = static_cast<uint64_t>(get_cycles_per_sec())/1000000;
101 | 
102 | 	if ((argc == 2) && (strcmp(argv[1], "--help") == 0)) {
103 | 		printf("Usage: smi [core core ...]\n");
104 | 		printf("With no arguments, runs on a preset group of cores\n");
105 | 		exit(0);
106 | 	}
107 | 
108 | 	for (i = 1; i < argc; i++) {
109 | 		char *end;
110 | 		int core = strtol(argv[i], &end, 10);
111 | 		if ((*end != 0) || (core < 0)) {
112 | 			fprintf(stderr, "Bad core number %s: must be positive "
113 | 					"integer\n", argv[i]);
114 | 			exit(1);
115 | 		}
116 | 	}
117 | 	if (cores.empty()) {
118 | 		for (i = 0; i < 10; i++) {
119 | 			cores.push_back(i);
120 | 		}
121 | 	}
122 | 	num_cores = static_cast<int>(cores.size());
123 | 
124 | 	time0 = rdtsc();
125 | 	std::vector<struct thread_gaps *> thread_gaps;
126 | 	std::vector<std::thread> threads;
127 | 	for (int core: cores) {
128 | 		struct thread_gaps *g = new struct thread_gaps;
129 | 		thread_gaps.push_back(g);
130 | 		threads.emplace_back(record_gaps, g, core);
131 | 	}
132 | 	for (i = 0; i < num_cores; i++) {
133 | 		threads[i].join();
134 | 	}
135 | 	uint64_t overlap = rdtsc() - time0;
136 | 	printf("Each line gives the starting time for a gap, plus the elapsed\n");
137 | 	printf("time since the previous gap of a similar duration.\n");
138 | 
139 | 	/* Each iteration through this loop checks to see if the current
140 | 	 * gaps from all of the cores are concurrent. If so, it records
141 | 	 * that gap. Otherwise, it discards the oldest gap.
142 | 	 */
143 | 	uint64_t total_gaps = 0;
144 | 	int num_gaps = 0;
145 | 	std::vector<struct prev_gap> found;
146 | 	while (true) {
147 | 		int oldest = 0;
148 | 		uint64_t oldest_start = 0, latest_start = 0, earliest_end = 0;
149 | 		for (i = 0; i < num_cores; i++) {
150 | 			struct thread_gaps *gaps = thread_gaps[i];
151 | 			if (gaps->current >= MAX_GAPS) {
152 | 				goto done;
153 | 			}
154 | 			uint64_t start = gaps->gaps[gaps->current].start;
155 | 			uint64_t end = gaps->gaps[gaps->current].end;
156 | //			printf("Gap on core %d [%d]: %.1f .. %.1f\n", i,
157 | //					gaps->current, usecs(start - time0),
158 | //					usecs(end - time0));
159 | 			if (i == 0) {
160 | 				oldest = 0;
161 | 				oldest_start = start;
162 | 				latest_start = start;
163 | 				earliest_end = end;
164 | 			} else {
165 | 				if (start < oldest_start) {
166 | 					oldest = i;
167 | 					oldest_start = start;
168 | 				}
169 | 				if (start > latest_start) {
170 | 					latest_start = start;
171 | 				}
172 | 				if (end < earliest_end) {
173 | 					earliest_end = end;
174 | 				}
175 | 			}
176 | 		}
177 | 		uint64_t overlap = (earliest_end > latest_start)
178 | 				? earliest_end - latest_start : 0;
179 | //		printf("latest_start %.1f, earliest_end %.1f, overlap %.1f\n",
180 | //				usecs(latest_start - time0),
181 | //				usecs(earliest_end - time0),
182 | //				usecs(overlap));
183 | 		if (overlap >= min_gap_length ) {
184 | 			/* We have a consistent gap across all cores. */
185 | 			num_gaps++;
186 | 			total_gaps += overlap;
187 | 
188 | 			/* Find the most recent event of similar duration. */
189 | 			uint64_t prev_start = time0;
190 | 			for (int j = static_cast<int>(found.size())-1;
191 | 					j >= 0; j--) {
192 | //				printf("Checking found[%d]: start %.1f ms, duration %.1f us\n",
193 | //						j, ms(found[j].start - time0),
194 | //						usecs(found[j].duration));
195 | 				uint64_t prev = found[j].duration;
196 | 				uint64_t delta = prev;
197 | 				if (overlap < delta) {
198 | 					delta = overlap;
199 | 				}
200 | 				delta = delta/4;
201 | //				printf("prev %lu, overlap %lu, delta %lu\n",
202 | //						prev, overlap, delta);
203 | 				if (((prev + delta) >= overlap)
204 | 						&& ((overlap + delta) >= prev)) {
205 | 					prev_start = found[j].start;
206 | 					break;
207 | 				}
208 | 			}
209 | 			found.emplace_back(latest_start, overlap);
210 | 			printf("%5.1f ms [+%5.1f ms] gap of %.1f usec\n",
211 | 					ms(latest_start - time0),
212 | 					ms(latest_start - prev_start),
213 | 					usecs(overlap));
214 | 			for (i = 0; i < num_cores; i++) {
215 | 				thread_gaps[i]->current++;
216 | 			}
217 | 		} else {
218 | 			/* Nothing consistent; drop the oldest gap. */
219 | //			printf("Dropping gap %d of core %d\n",
220 | //					thread_gaps[oldest]->current, oldest);
221 | 			thread_gaps[oldest]->current++;
222 | 		}
223 | 	}
224 | 	done:
225 | 	printf("%d gaps (every %.1f ms), total gap time %.1f usec (%.2f%% of all time)\n",
226 | 			num_gaps, (usecs(overlap)/1000)/num_gaps,
227 | 			usecs(total_gaps),
228 | 			100.0*usecs(total_gaps)/usecs(overlap));
229 | 	exit(0);
230 | }
231 | 


--------------------------------------------------------------------------------
/util/smi.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | """
 4 | Scans a timetrace looking for long gaps where no cores have any events
 5 | (probably because of System Management Interrupts)
 6 | Usage: ttsmi.py [tt_file]
 7 | 
 8 | The existing timetrace is in tt_file (or stdin in tt_file is omitted).
 9 | """
10 | 
11 | from __future__ import division, print_function
12 | from glob import glob
13 | from optparse import OptionParser
14 | import math
15 | import os
16 | import re
17 | import string
18 | import sys
19 | 
20 | if len(sys.argv) == 2:
21 |     f = open(sys.argv[1])
22 | elif len(sys.argv) == 1:
23 |     f = sys.stdin
24 | else:
25 |     print("Usage: %s [tt_file]" % (sys.argv[0]))
26 |     sys.exit(1)
27 | 
28 | prev_time = 0
29 | printed = 0
30 | 
31 | for line in f:
32 |     match = re.match(' *([-0-9.]+) us .* \[C([0-9]+)\]', line)
33 |     if not match:
34 |         continue
35 |     time = float(match.group(1))
36 |     core = int(match.group(2))
37 | 
38 |     if (time - prev_time) > 150:
39 |         print(line.rstrip())
40 |         printed += 1
41 |         if printed >= 5:
42 |             exit(0)
43 |     
44 |     prev_time = time
45 |             


--------------------------------------------------------------------------------
/util/test_time_trace.c:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2019-2022 Homa Developers
 2 |  * SPDX-License-Identifier: BSD-1-Clause
 3 |  */
 4 | 
 5 | /* This program exercises the Linux kernel time trace mechanism
 6 |  * by calling a new system call that creates time traces.
 7 |  */
 8 | 
 9 | #include <errno.h>
10 | #include <stdio.h>
11 | #include <string.h>
12 | #include <stdlib.h>
13 | #include <unistd.h>
14 | #include <sys/types.h>
15 | #include <sys/socket.h>
16 | 
17 | int main(int argc, char** argv) {
18 | 	int i;
19 | 	printf("Invoking new 'test_timetrace' syscall.\n");
20 | 	for (i = 0; i < 100; i++) {
21 | 		int status = syscall(334);
22 | 		if (status < 0) {
23 | 			printf(" Error in test_timetrace: %s (%d)",
24 | 					strerror(errno), errno);
25 | 		}
26 | 	}
27 | 	return 0;
28 | }
29 | 
30 | 


--------------------------------------------------------------------------------
/util/test_utils.h:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2019-2022 Homa Developers
 2 |  * SPDX-License-Identifier: BSD-1-Clause
 3 |  */
 4 | 
 5 | #ifndef _TEST_UTILS_H
 6 | #define _TEST_UTILS_H
 7 | 
 8 | #include <netinet/in.h>
 9 | 
10 | #ifdef __cplusplus
11 | #include <string>
12 | #include <vector>
13 | #endif
14 | 
15 | #include "homa.h"
16 | 
17 | #ifdef __cplusplus
18 | extern "C"
19 | {
20 | #endif
21 | 
22 | /**
23 |  * Holds either an IPv4 or IPv6 address (smaller and easier to use than
24 |  * sockaddr_storage).
25 |  */
26 | union sockaddr_in_union {
27 | 	struct sockaddr sa;
28 | 	struct sockaddr_in in4;
29 | 	struct sockaddr_in6 in6;
30 | };
31 | 
32 | /**
33 |  * sockaddr_size() - Return the number of bytes used by the argument.
34 |  * @sa:     Pointer to either an IPv4 or an IPv6 address.
35 |  */
36 | static inline uint32_t sockaddr_size(const struct sockaddr *sa)
37 | {
38 | 	return (sa->sa_family == AF_INET) ? sizeof(struct sockaddr_in) :
39 | 			sizeof(struct sockaddr_in6);
40 | }
41 | 
42 | #define sizeof32(type) static_cast<int>(sizeof(type))
43 | 
44 | extern int     check_buffer(void *buffer, size_t length);
45 | extern int     check_message(struct homa_recvmsg_args *control,
46 | 	           char *region, size_t length, int skip);
47 | extern double  get_cycles_per_sec();
48 | extern int     get_int(const char *s, const char *msg);
49 | extern void    pin_thread(int core);
50 | extern const char*
51 |                print_address(const union sockaddr_in_union *addr);
52 | extern void    print_dist(uint64_t times[], int count);
53 | extern void    seed_buffer(void *buffer, size_t length, int seed);
54 | #ifdef __cplusplus
55 | extern void    split(const char *s, char sep, std::vector<std::string> &dest);
56 | #endif
57 | extern double  to_seconds(uint64_t cycles);
58 | 
59 | /**
60 |  * rdtsc(): return the current value of the fine-grain CPU cycle counter
61 |  * (accessed via the RDTSC instruction).
62 |  */
63 | inline static uint64_t rdtsc(void)
64 | {
65 | 	uint32_t lo, hi;
66 | 	__asm__ __volatile__("rdtsc" : "=a" (lo), "=d" (hi));
67 | 	return (((uint64_t)hi << 32) | lo);
68 | }
69 | 
70 | #ifdef __cplusplus
71 | }
72 | #endif
73 | 
74 | #endif /* _TEST_UTILS_H */
75 | 


--------------------------------------------------------------------------------
/util/time_trace.h:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) 2020-2022 Homa Developers
  2 |  * SPDX-License-Identifier: BSD-1-Clause
  3 |  */
  4 | 
  5 | #ifndef TIMETRACE_H
  6 | #define TIMETRACE_H
  7 | 
  8 | #include <string>
  9 | #include <vector>
 10 | 
 11 | #include "test_utils.h"
 12 | 
 13 | // Change 1 -> 0 in the following line to disable time tracing globally.
 14 | #define ENABLE_TIME_TRACE 1
 15 | 
 16 | /**
 17 |  * class time_trace - Implements a circular buffer of entries, each of which
 18 |  * consists of a fine-grain timestamp, a short descriptive string, and
 19 |  * a few additional values. It's typically used to record times at
 20 |  * various points in an operation, in order to find performance bottlenecks.
 21 |  * It can record a trace relatively efficiently (< 10ns as of 7/2020),
 22 |  * and then either return the trace either as a string or print it to
 23 |  * the system log.
 24 |  *
 25 |  * This class is thread-safe. By default, trace information is recorded
 26 |  * separately for each thread in order to avoid synchronization and cache
 27 |  * consistency overheads; the thread-local traces are merged by when the
 28 |  * timetrace is printed, so the existence of multiple trace buffers is
 29 |  * normally invisible.
 30 |  *
 31 |  * The time_trace class should never be constructed; it offers only
 32 |  * static methods.
 33 |  *
 34 |  * If you want to use a single trace buffer rather than per-thread
 35 |  * buffers, see the subclass time_trace::buffer below.
 36 |  */
 37 | class time_trace {
 38 |     public:
 39 | 	static void cleanup();
 40 | 	static void freeze();
 41 | 	static std::string get_trace();
 42 | 	static int print_to_file(const char *name);
 43 | 
 44 | 	/** @frozen: nonzero means that the timetrace is already frozen. */
 45 | 	static int frozen;
 46 | 
 47 |     protected:
 48 | 	class buffer;
 49 | 
 50 | 	/**
 51 | 	 * @tb: points to a private per-thread time_trace::buffer;
 52 | 	 * NULL means no such object exists for the current thread.
 53 | 	 */
 54 | 	static __thread buffer* tb;
 55 | 
 56 | 	/**
 57 | 	 * @thread_buffers: holds pointers to all of the existing thread-private
 58 | 	 * buffers. Entries get deleted only by free_unused.
 59 | 	 */
 60 | 	static std::vector<buffer*> thread_buffers;
 61 | 
 62 |     public:
 63 | 
 64 | 	/**
 65 | 	 * record() - Record an event in a thread-local buffer.
 66 | 	 * @timestamp: The time at which the event occurred.
 67 | 	 * @format:    A format string for snprintf that will be used, along
 68 | 	 *             with arg0..arg3, to generate a human-readable message
 69 | 	 *             describing what happened, when the time trace is printed.
 70 | 	 *             The message is generated by calling snprintf as follows:
 71 | 	 *             snprintf(buffer, size, format, arg0, arg1, arg2, arg3)
 72 | 	 *             where format and arg0..arg3 are the corresponding
 73 | 	 *             arguments to this method. This pointer is stored in the
 74 | 	 *             time trace, so the caller must ensure that its contents
 75 | 	 *             will not change over its lifetime in the trace.
 76 | 	 * @arg0:      Argument to use when printing a message about this event.
 77 | 	 * @arg1:      Argument to use when printing a message about this event.
 78 | 	 * @arg2:      Argument to use when printing a message about this event.
 79 | 	 * @arg3:      Argument to use when printing a message about this event.
 80 | 	 */
 81 | 	static inline void record(uint64_t timestamp, const char* format,
 82 | 			uint32_t arg0 = 0, uint32_t arg1 = 0,
 83 | 			uint32_t arg2 = 0, uint32_t arg3 = 0) {
 84 | #if ENABLE_TIME_TRACE
 85 | 		tb->record(timestamp, format, arg0, arg1, arg2, arg3);
 86 | #endif
 87 | 	}
 88 | 	static inline void record(const char* format, uint32_t arg0 = 0,
 89 | 			uint32_t arg1 = 0, uint32_t arg2 = 0, uint32_t arg3 = 0) {
 90 | #if ENABLE_TIME_TRACE
 91 | 		record(rdtsc(), format, arg0, arg1, arg2, arg3);
 92 | #endif
 93 | 	}
 94 | 
 95 |     protected:
 96 | 	time_trace();
 97 | 	static void print_internal(std::string* s, FILE *f);
 98 | 
 99 | 	/** struct event - Holds one entry in a time_trace::buffer. */
100 | 	struct event {
101 | 		/* See documentation for record method. */
102 | 		uint64_t timestamp;
103 | 		const char* format;
104 | 		uint32_t arg0;
105 | 		uint32_t arg1;
106 | 		uint32_t arg2;
107 | 		uint32_t arg3;
108 | 	};
109 | 
110 | 	/**
111 | 	 * class buffer - Represents a sequence of events generated by a single
112 | 	 * thread.  Has a fixed capacity, so slots are re-used on a circular
113 | 	 * basis.  This class is not thread-safe.
114 | 	 */
115 | 	class buffer {
116 | 	    public:
117 | 		buffer(std::string name);
118 | 		~buffer();
119 | 		void record(uint64_t timestamp, const char* format,
120 | 				uint32_t arg0 = 0, uint32_t arg1 = 0,
121 | 				uint32_t arg2 = 0, uint32_t arg3 = 0);
122 | 		void reset();
123 | 
124 | 	    public:
125 | 		/** @name: name that identifies this buffer/thread. */
126 | 		std::string name;
127 | 
128 | 		/**
129 | 		 * @BUFFER_SIZE_EXP: determines the number of events we can
130 | 		 * retain, as an exponent of 2.
131 | 		 */
132 | 		static const uint8_t BUFFER_SIZE_EXP = 16;
133 | 
134 | 		/**
135 | 		 * @BUFFER_SIZE: total number of events that we can retain
136 | 		 * at any given time.
137 | 		 */
138 | 		static const uint32_t BUFFER_SIZE = 1 << BUFFER_SIZE_EXP;
139 | 
140 | 		/**
141 | 		 * @BUFFER_MASK: bit mask used to implement a circular event buffer.
142 | 		 */
143 | 		static const uint32_t BUFFER_MASK = BUFFER_SIZE - 1;
144 | 
145 | 		/**
146 | 		 * @next_index: index within events of the slot to use for the next
147 | 		 * call to record.
148 | 		 */
149 | 		int next_index;
150 | 
151 | 		/**
152 | 		 * @ref_count: number of thread_buffer objects that reference
153 | 		 * this buffer. When this count becomes 0, the buffer can be
154 | 		 * deleted in the next call to time_trace::cleanup.
155 | 		 */
156 | 		int ref_count;
157 | 
158 | 		/**
159 | 		 * @events: Holds information from the most recent calls to record.
160 | 		 */
161 | 		time_trace::event events[BUFFER_SIZE];
162 | 
163 | 		friend class time_trace;
164 | 	};
165 | 
166 |     public:
167 | 	/**
168 | 	 * class thread_buffer - One of these should be instantiated as a
169 | 	 * local variable in the top-level function for each thread that
170 | 	 * invokes tt. Such a variable ensures that a buffer is available
171 | 	 * for the lifetime of that thread.
172 | 	 */
173 | 	class thread_buffer {
174 | 	    public:
175 | 		thread_buffer(std::string name);
176 | 		~thread_buffer();
177 | 
178 | 	    protected:
179 | 		/* The buffer associated with this thread. Malloc-ed. The
180 | 		 * "official" reference to this is the one in thread_buffers.
181 | 		 */
182 | 		time_trace::buffer *buffer;
183 | 	};
184 | };
185 | 
186 | #define tt time_trace::record
187 | 
188 | #endif // TIMETRACE_H
189 | 
190 | 


--------------------------------------------------------------------------------
/util/tput.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | """
  4 | Analyzes throughput of message arrivals in a timetrace.
  5 | Usage: tput.py [--verbose] [tt_file]
  6 | 
  7 | The existing timetrace is in tt_file (or stdin in tt_file is omitted).
  8 | """
  9 | 
 10 | from __future__ import division, print_function
 11 | from glob import glob
 12 | from optparse import OptionParser
 13 | import math
 14 | import os
 15 | import re
 16 | import string
 17 | import sys
 18 | 
 19 | verbose = False
 20 | if (len(sys.argv) >= 2) and (sys.argv[1] == "--verbose"):
 21 |   verbose = True
 22 |   sys.argv.pop(1)
 23 | if len(sys.argv) == 2:
 24 |     f = open(sys.argv[1])
 25 | elif len(sys.argv) == 1:
 26 |     f = sys.stdin
 27 | else:
 28 |     print("Usage: %s [tt_file]" % (sys.argv[0]))
 29 |     sys.exit(1)
 30 | 
 31 | # Keys are RPC ids, values are dictionaries containing the following fields:
 32 | # start: time offset 0 received
 33 | # grant: time the first grant was sent
 34 | # grant_offset: offset in last data packet after first grant
 35 | # end: time last packet received
 36 | # offset: highest offset in any packet received for the RPC
 37 | rpcs = {}
 38 | 
 39 | for line in f:
 40 |     match = re.match(' *([-0-9.]+) us .* \[C([0-9]+)\]', line)
 41 |     if not match:
 42 |         continue
 43 |     time = float(match.group(1))
 44 |     core = match.group(2)
 45 | 
 46 |     match = re.match('.*sending grant for id ([0-9]+)',
 47 |         line)
 48 |     if match:
 49 |       id = match.group(1)
 50 |       if id in rpcs and not 'grant' in rpcs[id]:
 51 |         rpcs[id]['grant'] = time
 52 |         rpcs[id]['grant_offset'] = rpcs[id]['offset']
 53 | 
 54 |     match = re.match('.*homa_gro_receive got packet .* id ([0-9]+), '
 55 |         'offset ([0-9]+)', line)
 56 |     if match:
 57 |         id = match.group(1)
 58 |         offset = int(match.group(2))
 59 |         if (not id in rpcs) and (offset == 0):
 60 |           rpcs[id] = {'offset': 0, 'start': time}
 61 |         if id in rpcs:
 62 |           rpcs[id]['end'] = time
 63 |           if offset > rpcs[id]['offset']:
 64 |               rpcs[id]['offset'] = offset
 65 | 
 66 |     match = re.match('.*incoming data packet, id ([0-9]+), .* offset '
 67 |         '([0-9]+)/([0-9]+)', line)
 68 |     if match:
 69 |       id = match.group(1)
 70 |       length = int(match.group(3))
 71 |       if id in rpcs:
 72 |         rpcs[id]['length'] = length
 73 | 
 74 | total_bytes = 0
 75 | total_bytes2 = 0
 76 | total_time = 0
 77 | total_time2 = 0
 78 | tputs = []
 79 | tputs2 = []
 80 | for id in sorted(rpcs.keys()):
 81 |     rpc = rpcs[id]
 82 |     if (not 'start' in rpc) or (not 'end' in rpc):
 83 |         continue
 84 |     if rpc['offset'] < 300000:
 85 |         continue
 86 |     bytes = rpc['offset']
 87 |     time = rpc['end'] - rpc['start']
 88 |     tput = bytes*8.0/time/1000
 89 |     tputs.append(tput)
 90 |     total_bytes += bytes
 91 |     total_time += time
 92 |  
 93 |     # Compute separate statistics for throughput after sending the first
 94 |     # grant (this eliminates time waiting for the message to become highest
 95 |     # priority)
 96 |     if 'grant' in rpc:
 97 |       bytes2 = rpc['offset'] - rpc['grant_offset']
 98 |       time2 = rpc['end'] - rpc['grant']
 99 |       tput2 = bytes2*8.0/time2/1000
100 |       tputs2.append(tput2)
101 |       total_bytes2 += bytes2
102 |       total_time2 += time2
103 | 
104 |       if verbose:
105 |         print("%9.3f: id %s, grant at %9.3f, offset grant_offset %d, "
106 |             "last_offset %d at %9.3f, tput %.1f, tput2 %.1f" % (
107 |             rpc['start'], id, rpc['grant'], rpc['grant_offset'], rpc['offset'],
108 |             rpc['end'], tput, tput2))
109 | 
110 | tputs.sort()
111 | if verbose:
112 |   print("")
113 | print("Messages >= 300KB: %d" % (len(tputs)))
114 | print("Entire messages:")
115 | print("Minimum tput: %4.1f Gbps" % (tputs[0]))
116 | print("Median tput:  %4.1f Gbps" % (tputs[len(tputs)//2]))
117 | print("P90 tput:     %4.1f Gbps" % (tputs[len(tputs)*9//10]))
118 | print("P99 tput:     %4.1f Gbps" % (tputs[len(tputs)*99//100]))
119 | print("Maximum tput: %4.1f Gbps" % (tputs[-1]))
120 | print("Average tput: %4.1f Gbps" % (total_bytes*8.0/total_time/1000))
121 | 
122 | tputs2.sort()
123 | print("\nMessage data after first grant:")
124 | print("Minimum tput: %4.1f Gbps" % (tputs2[0]))
125 | print("Median tput:  %4.1f Gbps" % (tputs2[len(tputs2)//2]))
126 | print("P90 tput:     %4.1f Gbps" % (tputs2[len(tputs2)*9//10]))
127 | print("P99 tput:     %4.1f Gbps" % (tputs2[len(tputs2)*99//100]))
128 | print("Maximum tput: %4.1f Gbps" % (tputs2[-1]))
129 | print("Average tput: %4.1f Gbps" % (total_bytes2*8.0/total_time2/1000))
130 | 


--------------------------------------------------------------------------------
/util/ttgrep.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | # Copyright (c) 2019-2022 Homa Developers
 4 | # SPDX-License-Identifier: BSD-1-Clause
 5 | 
 6 | """
 7 | Scan the time trace data in a log file; find all records containing
 8 | a given string, and output only those records. If the --rebase argument
 9 | is present, times are offset so the first event is at time 0. If the file
10 | is omitted, standard input is used.
11 | Usage: ttgrep.py [--rebase] string [file]
12 | """
13 | 
14 | from __future__ import division, print_function
15 | from glob import glob
16 | from optparse import OptionParser
17 | import math
18 | import os
19 | import re
20 | import string
21 | import sys
22 | 
23 | rebase = False
24 | 
25 | def scan(f, string):
26 |     """
27 |     Scan the log file given by 'f' (handle for an open file) and output
28 |     all-time trace records containing string.
29 |     """
30 |     global rebase
31 |     startTime = 0.0
32 |     prevTime = 0.0
33 |     writes = 0
34 |     for line in f:
35 |         match = re.match(' *([-0-9.]+) us \(\+ *([0-9.]+) us\) (.*)',
36 |                 line)
37 |         if not match:
38 |             continue
39 |         time = float(match.group(1))
40 |         interval = float(match.group(2))
41 |         event = match.group(3)
42 |         if (string not in event) and ("Freez" not in event):
43 |             continue
44 |         if startTime == 0.0:
45 |             startTime = time
46 |             prevTime = time
47 |         if rebase:
48 |             printTime = time - startTime
49 |         else:
50 |             printTime = time
51 |         print("%9.3f us (+%8.3f us) %s" % (printTime,
52 |                 time - prevTime, event))
53 |         prevTime = time
54 | 
55 | if (len(sys.argv) > 1) and (sys.argv[1] == "--rebase"):
56 |     rebase = True
57 |     del sys.argv[1]
58 | 
59 | f = sys.stdin
60 | if len(sys.argv) == 3:
61 |     f = open(sys.argv[2])
62 | elif len(sys.argv) != 2:
63 |     print("Usage: %s [--rebase] string [logFile]" % (sys.argv[0]))
64 |     sys.exit(1)
65 | 
66 | scan(f, sys.argv[1])


--------------------------------------------------------------------------------
/util/ttmerge.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | # Copyright (c) 2019-2022 Homa Developers
  4 | # SPDX-License-Identifier: BSD-1-Clause
  5 | 
  6 | """
  7 | Merge two or more timetraces into a single trace. All of the traces
  8 | must use the same time source.
  9 | Usage: ttmerge.py file file file ...
 10 | """
 11 | 
 12 | from __future__ import division, print_function
 13 | from glob import glob
 14 | import math
 15 | from optparse import OptionParser
 16 | import os
 17 | import re
 18 | import string
 19 | import sys
 20 | 
 21 | # Each entry in the following list describes one file; it is a dictionary
 22 | # with the following fields:
 23 | # name:      Name of the file
 24 | # f:         Open file for reading
 25 | # ghz:       Clock rate assumed for this file
 26 | # first:     Timestamp of first entry
 27 | # offset:    How much to add to times in this file so they align
 28 | #            with times in the other files
 29 | # time:      Time of the current line, adjusted by offset
 30 | # suffix:    Everything on the current line after the times
 31 | files = []
 32 | 
 33 | # Earliest first timestamp from all the files.
 34 | first = 0
 35 | 
 36 | # Reference ghz (taken from input file with the earliest start time;
 37 | # used for output). Used to compensate for the fact that different
 38 | # traces may have assumed slightly different conversion rates from
 39 | # ticks to microseconds.
 40 | ghz = 0.0
 41 | 
 42 | def next_line(info):
 43 |     """
 44 |     Read information from a file. The info argument is one of the
 45 |     entries in files.
 46 |     """
 47 |     while True:
 48 |         line = info["f"].readline()
 49 |         if not line:
 50 |             info["f"].close()
 51 |             info["f"] = None
 52 |             return
 53 |         match = re.match(' *([0-9.]+) us \(\+ *([0-9.]+) us\) (.*)', line)
 54 |         if not match:
 55 |             continue
 56 |         info["time"] = (float(match.group(1)) * ghz / info["ghz"]) + info["offset"]
 57 |         info["suffix"] = match.group(3).rstrip()
 58 |         return
 59 | 
 60 | # Open each of the files and initialize information for them.
 61 | for file in sys.argv[1:]:
 62 |     f = open(file, newline='\n')
 63 |     line = f.readline()
 64 |     if not line:
 65 |         continue
 66 |     info = {"f": f}
 67 |     match = re.match(' *([0-9.]+) us \(\+ *([0-9.]+) us\) .* '
 68 |             'First event has timestamp ([0-9]+) '
 69 |             '\(cpu_ghz ([0-9.]+)\)', line)
 70 |     if not match:
 71 |         continue
 72 |     info = {"name": file,
 73 |             "f": f,
 74 |             "ghz": float(match.group(4)),
 75 |             "first": int(match.group(3)),
 76 |             "offset": 0.0}
 77 |     files.append(info)
 78 | 
 79 | # Find the earliest timestamp and set offsets.
 80 | for info in files:
 81 |     if (first == 0) or info["first"] < first:
 82 |         first = info["first"]
 83 |         ghz = info["ghz"]
 84 | for info in files:
 85 |     info["offset"] = ((info["first"] - first)/ghz)/1000.0
 86 |     # print("file %s has offset %.2f us (difference: %d)" % (info["name"],
 87 |     #         info["offset"], info["first"] - first))
 88 | 
 89 |     # Prime the info with the first real trace entry.
 90 |     next_line(info)
 91 | 
 92 | # Repeatedly output the earliest line until there are no lines left to output.
 93 | prevTime = 0.0
 94 | while True:
 95 |     best = None
 96 |     best_time = 0.0
 97 |     for info in files:
 98 |         if info["f"] and ((best_time == 0.0) or (info["time"] < best_time)):
 99 |             best_time = info["time"]
100 |             best = info
101 |     if not best:
102 |         break
103 |     time = best["time"]
104 |     print("%9.3f us (+%8.3f us) %s" % (time, time - prevTime, best["suffix"]))
105 |     prev_time = time
106 |     next_line(best)


--------------------------------------------------------------------------------
/util/ttoffset.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | # Copyright (c) 2019-2022 Homa Developers
 4 | # SPDX-License-Identifier: BSD-1-Clause
 5 | 
 6 | """
 7 | Rewrite a time trace with all of the times offset by a fixed amount
 8 | (typically used to align the times in two timetraces)
 9 | Usage: ttoffset.py old_time new_time [tt_file]
10 | 
11 | The existing timetrace is in tt_file (or stdin in tt_file is omitted); a new
12 | timetrace will be written to standard output, with (new_time - old_time)
13 | added to each timestamp.
14 | """
15 | 
16 | from __future__ import division, print_function
17 | from glob import glob
18 | from optparse import OptionParser
19 | import math
20 | import os
21 | import re
22 | import string
23 | import sys
24 | 
25 | if len(sys.argv) == 4:
26 |     f = open(sys.argv[3])
27 | elif len(sys.argv) == 3:
28 |     f = sys.stdin
29 | else:
30 |     print("Usage: %s old_time new_time [tt_file]" % (sys.argv[0]))
31 |     sys.exit(1)
32 | 
33 | delta = float(sys.argv[2]) - float(sys.argv[1])
34 | 
35 | for line in f:
36 |     match = re.match(' *([0-9.]+) us (.*)', line)
37 |     if not match:
38 |         print(line)
39 |         continue
40 |     time = float(match.group(1))
41 |     print("%9.3f us %s" % (time + delta, match.group(2)))


--------------------------------------------------------------------------------
/util/ttprint.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | # Copyright (c) 2019-2022 Homa Developers
 4 | # SPDX-License-Identifier: BSD-1-Clause
 5 | 
 6 | """
 7 | This program reads timetrace information from /proc/timetrace (or from
 8 | the first argument, if given) and prints it out in a different form,
 9 | with times in microseconds instead of clock cycles.
10 | """
11 | 
12 | from __future__ import division, print_function
13 | from glob import glob
14 | from optparse import OptionParser
15 | import math
16 | import os
17 | import re
18 | import string
19 | import sys
20 | 
21 | # Clock cycles per nanosecond.
22 | cpu_ghz  = 0.0
23 | 
24 | # Time in cycles of first event.
25 | first_time = 0
26 | 
27 | # Time in cycles of previous event.
28 | prev_time = 0
29 | 
30 | file_name = "/proc/timetrace"
31 | if len(sys.argv) > 1:
32 |     file_name = sys.argv[1]
33 | f = open(file_name)
34 | 
35 | # Read initial line containing clock rate.
36 | line = f.readline()
37 | if not line:
38 |     print('File empty!')
39 |     exit(0)
40 | match = re.match('cpu_khz: ([0-9.]+)', line)
41 | if not match:
42 |     print('Initial line doesn\'t contain clock rate:\n%s' % (line))
43 |     exit(1)
44 | cpu_ghz = float(match.group(1))*1e-06
45 | 
46 | for line in f:
47 |     match = re.match('([0-9.]+) (.+)', line)
48 |     if not match:
49 |         continue
50 |     this_time = float(match.group(1))
51 |     this_event = match.group(2)
52 |     if first_time == 0.0:
53 |         first_time = this_time
54 |         prev_time = this_time
55 |         print('%9.3f us (+%8.3f us) [C00] First event has timestamp %s '
56 |                 '(cpu_ghz %.15f)' % (0, 0, match.group(1), cpu_ghz))
57 |     print('%9.3f us (+%8.3f us) %s' % (
58 |             (this_time - first_time)/(1000.0 *cpu_ghz),
59 |             (this_time - prev_time)/(1000.0 * cpu_ghz), this_event))
60 |     prev_time = this_time


--------------------------------------------------------------------------------
/util/ttrange.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | # Copyright (c) 2019-2022 Homa Developers
 4 | # SPDX-License-Identifier: BSD-1-Clause
 5 | 
 6 | """
 7 | Extract entries from a timetrace that For any particular time range.
 8 | Usage: ttrange.py start_time end_time [tt_file]
 9 | 
10 | The existing timetrace is in tt_file (or stdin in tt_file is omitted); a new
11 | timetrace will be written to standard output containing all entries whose
12 | timestamps fall between start_time and end_time, inclusive.
13 | """
14 | 
15 | from __future__ import division, print_function
16 | from glob import glob
17 | from optparse import OptionParser
18 | import math
19 | import os
20 | import re
21 | import string
22 | import sys
23 | 
24 | if len(sys.argv) == 4:
25 |     f = open(sys.argv[3])
26 | elif len(sys.argv) == 3:
27 |     f = sys.stdin
28 | else:
29 |     print("Usage: %s start_time end_time [tt_file]" % (sys.argv[0]))
30 |     sys.exit(1)
31 | 
32 | start_time = float(sys.argv[1])
33 | end_time = float(sys.argv[2])
34 | 
35 | for line in f:
36 |     match = re.match(' *([0-9.]+) us (.*)', line)
37 |     if not match:
38 |         continue
39 |     time = float(match.group(1))
40 |     if (time >= start_time) and (time <= end_time):
41 |       print(line.rstrip('\n'))


--------------------------------------------------------------------------------
/util/ttsyslog.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | # Copyright (c) 2019-2022 Homa Developers
 4 | # SPDX-License-Identifier: BSD-1-Clause
 5 | 
 6 | """
 7 | This program reads timetrace information that was printk-ed to the
 8 | system log, removing extraneous syslog information and printing it
 9 | out with times in microseconds instead of clock cycles.
10 | 
11 | Usage:
12 | ttsyslog.py [file]
13 | 
14 | If no file is given, the information is read from standard input.
15 | """
16 | 
17 | from __future__ import division, print_function
18 | from glob import glob
19 | from optparse import OptionParser
20 | import math
21 | import os
22 | import re
23 | import string
24 | import sys
25 | 
26 | # Clock cycles per nanosecond.
27 | cpu_ghz  = None
28 | 
29 | # Time in cycles of first event.
30 | first_time = 0
31 | 
32 | # Time in cycles of previous event.
33 | prev_time = 0
34 | 
35 | f = sys.stdin
36 | if len(sys.argv) > 1:
37 |     f = open(sys.argv[1])
38 | 
39 | for line in f:
40 |     # Ignore everything up until the initial line containing the clock speed.
41 |     if cpu_ghz == None:
42 |         match = re.match('.*cpu_khz: ([0-9.]+)', line)
43 |         if match:
44 |             cpu_ghz = float(match.group(1))*1e-06
45 |         continue
46 | 
47 |     match = re.match('.* ([0-9.]+) (\[C..\] .+)', line)
48 |     if not match:
49 |         continue
50 |     this_time = float(match.group(1))
51 |     this_event = match.group(2)
52 |     if first_time == 0.0:
53 |         first_time = this_time
54 |         prev_time = this_time
55 |         print('%9.3f us (+%8.3f us) [C00] First event has timestamp %s '
56 |                 '(cpu_ghz %.15f)' % (0, 0, match.group(1), cpu_ghz))
57 |     print('%9.3f us (+%8.3f us) %s' % (
58 |             (this_time - first_time)/(1000.0 *cpu_ghz),
59 |             (this_time - prev_time)/(1000.0 * cpu_ghz), this_event))
60 |     prev_time = this_time


--------------------------------------------------------------------------------
/util/use_memory.c:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2019-2022 Homa Developers
 2 |  * SPDX-License-Identifier: BSD-1-Clause
 3 |  */
 4 | 
 5 | /* This program allocates a given amount of memory and then sleeps
 6 |  * forever. It is intended to create memory pressure in order to see
 7 |  * how other parts of the system react when memory runs low.
 8 |  *
 9 |  * Usage:
10 |  * use_memory gbytes
11 |  */
12 | 
13 | #include <stdio.h>
14 | #include <stdlib.h>
15 | #include <unistd.h>
16 | 
17 | int main(int argc, char** argv) {
18 | 	int gbytes, i, j;
19 | 
20 | 	if (argc != 2) {
21 | 		printf("Usage: %s gbytes\n", argv[0]);
22 | 		exit(1);
23 | 	}
24 | 	gbytes = strtol(argv[1], NULL, 10);
25 | 	if (gbytes == 0) {
26 | 		printf("Bad value %s; must be integer # of gbytes to allocate\n",
27 | 				argv[1]);
28 | 		exit(1);
29 | 	}
30 | 
31 | 	// Each iteration through the following loop allocates 10^9 bytes
32 | 	// of memory and fills it with random values.
33 | 	for (i = 0; i < gbytes; i++) {
34 | #define INTS_PER_GIG 256000000
35 | 		int *block;
36 | 		block = (int *) malloc(INTS_PER_GIG*sizeof(int));
37 | 		if (block == NULL) {
38 | 			printf("Malloc returned NULL.\n");
39 | 			exit(1);
40 | 		}
41 | 		for (j = 0; j < INTS_PER_GIG; j++) {
42 | 			block[j] = random();
43 | 		}
44 | 		printf("Memory allocated: %d gbytes\n", i+1);
45 | 	}
46 | 	while (1) {
47 | 		sleep(1000);
48 | 	}
49 | 	return 0;
50 | }
51 | 
52 | 


--------------------------------------------------------------------------------