├── .gitignore ├── Makefile ├── README.md ├── balance.txt ├── cloudlab ├── README.md ├── bash_profile ├── bashrc ├── bin │ ├── ckill │ ├── config │ ├── install_homa │ ├── on_nodes │ ├── set_cutoffs │ ├── switch.py │ └── update_linux ├── config_switch ├── gdbinit └── update ├── dissector ├── CMakeLists.txt ├── README.md └── homa.c ├── homa.h ├── homa_api.c ├── homa_grant.c ├── homa_grant.h ├── homa_impl.h ├── homa_incoming.c ├── homa_metrics.c ├── homa_metrics.h ├── homa_offload.c ├── homa_offload.h ├── homa_outgoing.c ├── homa_peer.c ├── homa_peer.h ├── homa_plumbing.c ├── homa_pool.c ├── homa_pool.h ├── homa_receiver.cc ├── homa_receiver.h ├── homa_rpc.c ├── homa_rpc.h ├── homa_skb.c ├── homa_skb.h ├── homa_sock.c ├── homa_sock.h ├── homa_stub.h ├── homa_timer.c ├── homa_utils.c ├── homa_wire.h ├── man ├── Makefile ├── homa.7 ├── homa_abort.3 ├── homa_reply.3 ├── homa_send.3 ├── recvmsg.2 └── sendmsg.2 ├── notes.txt ├── perf.txt ├── perf ├── README.md ├── plot_length_cdf.py └── rtt.xlsx ├── protocol.md ├── reap.txt ├── rsync-exclude.txt ├── sync.txt ├── test ├── Makefile ├── README.md ├── ccutils.cc ├── ccutils.h ├── kselftest_harness.h ├── main.c ├── mergedep.pl ├── mock.c ├── mock.h ├── unit_homa_grant.c ├── unit_homa_incoming.c ├── unit_homa_metrics.c ├── unit_homa_offload.c ├── unit_homa_outgoing.c ├── unit_homa_peer.c ├── unit_homa_plumbing.c ├── unit_homa_pool.c ├── unit_homa_rpc.c ├── unit_homa_skb.c ├── unit_homa_sock.c ├── unit_homa_timer.c ├── unit_homa_utils.c ├── unit_timetrace.c ├── utils.c └── utils.h ├── timetrace.c ├── timetrace.h └── util ├── Makefile ├── README.md ├── avg.py ├── buffer_client.c ├── buffer_server.c ├── cp_basic ├── cp_both ├── cp_buffers ├── cp_client_threads ├── cp_config ├── cp_config_buf ├── cp_load ├── cp_mtu ├── cp_node.cc ├── cp_server_ports ├── cp_tcp ├── cp_tcp_config ├── cp_vs_tcp ├── cperf.py ├── diff_metrics.py ├── diff_rtts.py ├── dist.cc ├── dist.h ├── dist_test.cc ├── dist_to_proto.cc ├── get_time_trace.c ├── get_traces ├── homa_prio.cc ├── homa_test.cc ├── inc_tput.cc ├── metrics.py ├── plot.py ├── plot_tthoma.py ├── receive_raw.c ├── rpcid.py ├── scratch.c ├── send_many ├── send_raw.c ├── server.cc ├── service.py ├── smi.cc ├── smi.py ├── strip.py ├── test_time_trace.c ├── test_utils.cc ├── test_utils.h ├── time_trace.cc ├── time_trace.h ├── tput.py ├── ttgrep.py ├── tthoma.py ├── ttmerge.py ├── ttoffset.py ├── ttprint.py ├── ttrange.py ├── ttsum.py ├── ttsync.py ├── ttsyslog.py └── use_memory.c /.gitignore: -------------------------------------------------------------------------------- 1 | .*.swp 2 | \#*# 3 | *.pyc 4 | *.o 5 | *.hi 6 | *.dump 7 | *.log 8 | *.rej 9 | *.orig 10 | *.patch 11 | *.diff 12 | .tags* 13 | .deps 14 | *.pdf 15 | *.tt 16 | *.out 17 | 18 | # Ignore IDE files 19 | /.idea/ 20 | /nbproject/ 21 | 22 | reports/ 23 | traces/ 24 | bytedance/ 25 | saved_traces/ -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Makefile to build Homa as a Linux module. 2 | 3 | HOMA_OBJS := homa_grant.o \ 4 | homa_incoming.o \ 5 | homa_metrics.o \ 6 | homa_offload.o \ 7 | homa_outgoing.o \ 8 | homa_peer.o \ 9 | homa_pool.o \ 10 | homa_plumbing.o \ 11 | homa_rpc.o \ 12 | homa_skb.o \ 13 | homa_sock.o \ 14 | homa_timer.o \ 15 | homa_utils.o \ 16 | timetrace.o 17 | 18 | ifneq ($(KERNELRELEASE),) 19 | 20 | obj-m += homa.o 21 | homa-y = $(HOMA_OBJS) 22 | 23 | MY_CFLAGS += -g 24 | ccflags-y += ${MY_CFLAGS} 25 | CC += ${MY_CFLAGS} 26 | 27 | else 28 | 29 | ifneq ($(KERNEL_SRC),) 30 | # alternatively to variable KDIR accept variable KERNEL_SRC as used in 31 | # PetaLinux/Yocto for example 32 | KDIR ?= $(KERNEL_SRC) 33 | endif 34 | 35 | KDIR ?= /lib/modules/$(shell uname -r)/build 36 | 37 | all: 38 | $(MAKE) -C $(KDIR) M=$(shell pwd) modules 39 | 40 | install: 41 | $(MAKE) -C $(KDIR) M=$(shell pwd) modules_install 42 | 43 | check: 44 | ../homaLinux/scripts/kernel-doc -none *.c 45 | 46 | # Copy stripped source files to a Linux source tree 47 | LINUX_SRC_DIR ?= ../net-next 48 | HOMA_TARGET ?= $(LINUX_SRC_DIR)/net/homa 49 | CP_HDRS := homa_impl.h \ 50 | homa_peer.h \ 51 | homa_pool.h \ 52 | homa_rpc.h \ 53 | homa_sock.h \ 54 | homa_stub.h \ 55 | homa_wire.h 56 | CP_SRCS := $(patsubst %.o,%.c,$(filter-out timetrace.o, $(HOMA_OBJS))) 57 | CP_TARGETS := $(patsubst %,$(HOMA_TARGET)/%,$(CP_HDRS) $(CP_SRCS)) 58 | net-next: $(CP_TARGETS) $(LINUX_SRC_DIR)/include/uapi/linux/homa.h 59 | $(HOMA_TARGET)/%: % util/strip.py 60 | util/strip.py $< > $@ 61 | $(LINUX_SRC_DIR)/include/uapi/linux/homa.h: homa.h util/strip.py 62 | util/strip.py $< > $@ 63 | 64 | clean: 65 | $(MAKE) -C $(KDIR) M=$(shell pwd) clean 66 | 67 | # The following targets are useful for debugging Makefiles; they 68 | # print the value of a make variable in one of several contexts. 69 | print-%: 70 | @echo $* = $($*) 71 | 72 | printBuild-%: 73 | $(MAKE) -C $(KDIR) M=$(shell pwd) $@ 74 | 75 | printClean-%: 76 | $(MAKE) -C $(KDIR) M=$(shell pwd) $@ 77 | 78 | endif 79 | -------------------------------------------------------------------------------- /balance.txt: -------------------------------------------------------------------------------- 1 | This file discusses the issue of load-balancing in Homa. 2 | 3 | In order to keep up with fast networks, transport protocols must distribute 4 | their processing across multiple cores. For outgoing packets this happens 5 | naturally: sending threads run on different cores and packet processing 6 | for outbound packets happens on the same core is the sending thread. Things 7 | are more difficult for incoming packets. In general, an incoming packet 8 | will pass through 3 cores: 9 | * NAPI/GRO: the NIC distributes incoming packets across cores using RSS. 10 | The number of incoming channels, and their association with cores, can 11 | be configured in software. The NIC will then distribute packets across 12 | those channels using a hash based on packet header fields. The device 13 | driver receives packets as part of NAPI, then packets are collected into 14 | batches using GRO and handed off to SoftIRQ. 15 | * SoftIRQ processing occurs on a (potentially) different core from NAPI/GRO; 16 | the network stack runs here, including Homa's main handlers for incoming 17 | packets. The system default is to compute another hash function on packet 18 | headers to select a SoftIRQ or for a batch, but it is possible for GRO 19 | to make its own choice of core, and Homa does this. 20 | * Once a complete message is received, it is handed off to an application 21 | thread, which typically runs on a different core. 22 | 23 | The load balancing challenge is to distribute load across multiple cores 24 | without overloading any individual core ("hotspots"). This has proven 25 | quite difficult, and hotspots are the primary source of tail latency in Homa. 26 | The most common cause of hotspots is when 2 or more of the above tasks 27 | are assigned to the same core. For example: 28 | * Two batches from different NAPI/GRO cores might get assigned to the same 29 | SoftIRQ core. 30 | * A particular core might be very busy handling NAPI/GRO for a stream of 31 | packets in a large message; this will prevent application threads from 32 | making progress on that core. A short message might pass through other 33 | cores for NAPI/GRO and SoftIRQ, but if its application is running on 34 | the busy core, then it will not able to process the short message. 35 | 36 | Part of the problem is that core assignments are made independently by 37 | 3 different schedulers (RSS for the NAPI/GRO core, GRO or the system for 38 | the SoftIRQ core, and the Linux scheduler for the application core), 39 | so conflicts are likely to occur. Only one of these schedulers is under 40 | control of the transport protocol. 41 | 42 | It's also important to note that using more cores isn't always the best 43 | approach. For example, if a node is lightly loaded, it would be best to 44 | do all RX processing on a single core: using multiple cores causes extra 45 | cache misses as data migrates from core to core, and it also adds latency 46 | to pass control between cores. In an ideal world, the number of cores used for 47 | protocol processing would be just enough to keep any of them from getting 48 | overloaded. However, it appears to be hard to vary the number of cores 49 | without risking overloads; except in a few special cases, Homa doesn't do 50 | this. 51 | 52 | Homa tries to use its control over SoftIRQ scheduling to minimize hotspots. 53 | Several different approaches have been tried over time; this document 54 | focuses on the two most recent ones, which are called "Gen2" and "Gen3". 55 | 56 | Gen2 Load Balancing 57 | ------------------- 58 | * Gen2 assumes that NAPI/GRO processing is occurring on all cores. 59 | * When GRO chooses where to assign a batch of packets for SoftIRQ, it 60 | considers the next several cores (in ascending circular core order 61 | after the GRO core). 62 | * GRO uses several criteria to try to find a "good" core for SoftIRQ, such 63 | as avoiding a core that has done recent GRO processing, or one for which 64 | there is already pending SoftIRQ work. 65 | * Selection stops as soon as it finds a "good" core. 66 | * If no "good" core is found, then GRO will rotate among the successor 67 | cores on a batch-by-batch basis. 68 | * In some cases, Gen2 will bypass the SoftIRQ handoff mechanism and simply 69 | run SoftIRQ immediately on its core. This is done in two cases: short 70 | packets and grant packets. Bypass is particularly useful for grants 71 | because it eliminates the latency associated with a handoff, and grant 72 | turnaround time is important for overall performance. 73 | 74 | Gen2 has several problems: 75 | * It doesn't do anything about the problem of application threads conflicting 76 | with NAPI/GRO or SoftIRQ. 77 | * A single core may be assigned both SoftIRQ and NAPI/GRO work at the 78 | same time. 79 | * The SoftIRQ core groups for different NAPI/GRO cores overlap, so it's 80 | possible for multiple GROs to schedule batches to the same SoftIRQ core. 81 | * When receiving packets from a large message, Gen2 tends to alternate between 82 | 2 or more SoftIRQ cores, which results in unnecessary cache coherency 83 | traffic. 84 | * If the NAPI/GRO core is overloaded, bypass can make things worse (especially 85 | since grant processing results in transmitting additional packets, which 86 | is fairly expensive). 87 | 88 | Gen3 Load Balancing 89 | ------------------- 90 | The Gen3 load-balancing mechanism is an attempt to solve the problems 91 | associated with Gen2. 92 | * The number of channels is reduced, so that only 1/4 of the cores do 93 | NAPI/GRO processing. This appears to be sufficient capacity to avoid 94 | overloads on any of the NAPI/GRO cores. 95 | * Each NAPI/GRO core has 3 other cores (statically assigned) that it can use 96 | for SoftIRQ processing. The SoftIRQ core groups for different NAPI/GRO 97 | cores do not overlap. This means that SoftIRQ and GRO will never happen 98 | simultaneously on the same core, and there will be no conflicts between 99 | the SoftIRQ groups of different NAPI/GRO cores. 100 | * Gen3 takes steps to avoid core conflicts between application threads and 101 | NAPI/GRO and SoftIRQ processing, as described below. 102 | * When an application thread is using Homa actively on a core, the core 103 | is marked as "busy". When GRO selects a SoftIRQ core, it attempts to 104 | avoid cores that are busy with application threads. If there is a choice 105 | of un-busy cores, GRO will try to reuse a single SoftIRQ over and over. 106 | * Homa also keeps track of recent NAPI/GRO and SoftIRQ processing on each 107 | core. When an incoming message becomes ready and there are multiple threads 108 | waiting for messages, Homa tries to pick a thread whose core has not had 109 | recent Homa activity. 110 | * Between these two mechanisms, the hope is that SoftIRQ and application 111 | work will adjust their core assignments to avoid conflicts. 112 | 113 | Gen3 was implemented in November of 2023; so far its performance appears to be 114 | about the same as Gen2 (slightly worse for W2 and W3, slightly better for W5). 115 | Gen3 performance on W3 appears highly variable: P99 latency can vary by 5-10x 116 | from run to run; as of December 2023 the reasons for this have not been 117 | determined. -------------------------------------------------------------------------------- /cloudlab/README.md: -------------------------------------------------------------------------------- 1 | This directory contains scripts and other supporting files for testing 2 | Homa on the CloudLab cluster. Miscellaneous notes: 3 | 4 | - Everything in the bin directory will be copied to ~/bin on CloudLab. 5 | -------------------------------------------------------------------------------- /cloudlab/bash_profile: -------------------------------------------------------------------------------- 1 | export LANG=C 2 | export VISUAL=vi 3 | 4 | # The following lines arrange for the current git branch to 5 | # appear in the shell prompt. 6 | 7 | parse_git_branch() { 8 | ref=$(git symbolic-ref HEAD -q 2>/dev/null) 9 | st=$? 10 | if [ $st -eq 1 ]; then 11 | echo "[detached]" 12 | elif [ $st -eq 0 ]; then 13 | echo " [${ref#refs/heads/}]" 14 | fi 15 | } 16 | 17 | PS1="${debian_chroot:+($debian_chroot)}\u@\h:\w\$(parse_git_branch)\$ " 18 | XTERM_TITLE="\w$(parse_git_branch)" 19 | PS1="\n\[\e]0;$XTERM_TITLE\a\]\[\e[1;32m\]$PS1\[\e[1;37m\]" 20 | 21 | . ~/.bashrc 22 | -------------------------------------------------------------------------------- /cloudlab/bashrc: -------------------------------------------------------------------------------- 1 | # ~/.bashrc: executed by bash(1) for non-login shells. 2 | # see /usr/share/doc/bash/examples/startup-files (in the package bash-doc) 3 | # for examples 4 | 5 | PATH=/opt/gradle-7.3/bin:/$USER/install/bin:~/homaModule/util:~/homaModule/perf:~/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin 6 | 7 | export LD_LIBRARY_PATH=/usr/local/lib:/usr/local/lib/x86_64-linux-gnu 8 | 9 | export PYTHONPATH=/users/$USER/homaModule/util:/users/$USER/bin 10 | 11 | export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64 12 | 13 | # If not running interactively, don't do anything 14 | [ -z "$PS1" ] && return 15 | 16 | # don't put duplicate lines in the history. See bash(1) for more options 17 | # don't overwrite GNU Midnight Commander's setting of `ignorespace'. 18 | export HISTCONTROL=$HISTCONTROL${HISTCONTROL+,}ignoredups 19 | # ... or force ignoredups and ignorespace 20 | export HISTCONTROL=ignoreboth 21 | 22 | # append to the history file, don't overwrite it 23 | shopt -s histappend 24 | 25 | # for setting history length see HISTSIZE and HISTFILESIZE in bash(1) 26 | 27 | # check the window size after each command and, if necessary, 28 | # update the values of LINES and COLUMNS. 29 | shopt -s checkwinsize 30 | 31 | # make less more friendly for non-text input files, see lesspipe(1) 32 | [ -x /usr/bin/lesspipe ] && eval "$(SHELL=/bin/sh lesspipe)" 33 | 34 | # set variable identifying the chroot you work in (used in the prompt below) 35 | if [ -z "$debian_chroot" ] && [ -r /etc/debian_chroot ]; then 36 | debian_chroot=$(cat /etc/debian_chroot) 37 | fi 38 | 39 | # set a fancy prompt (non-color, unless we know we "want" color) 40 | case "$TERM" in 41 | xterm-color) color_prompt=yes;; 42 | esac 43 | 44 | # uncomment for a colored prompt, if the terminal has the capability; turned 45 | # off by default to not distract the user: the focus in a terminal window 46 | # should be on the output of commands, not on the prompt 47 | force_color_prompt=yes 48 | 49 | if [ -n "$force_color_prompt" ]; then 50 | if [ -x /usr/bin/tput ] && tput setaf 1 >&/dev/null; then 51 | # We have color support; assume it's compliant with Ecma-48 52 | # (ISO/IEC-6429). (Lack of such support is extremely rare, and such 53 | # a case would tend to support setf rather than setaf.) 54 | color_prompt=yes 55 | else 56 | color_prompt= 57 | fi 58 | fi 59 | 60 | # If this is an xterm set the title to user@host:dir 61 | case "$TERM" in 62 | xterm*|rxvt*) 63 | PS1="\[\e]0;${debian_chroot:+($debian_chroot)}\u@\h: \w\a\]$PS1" 64 | ;; 65 | *) 66 | ;; 67 | esac 68 | 69 | # Alias definitions. 70 | # You may want to put all your additions into a separate file like 71 | # ~/.bash_aliases, instead of adding them here directly. 72 | # See /usr/share/doc/bash-doc/examples in the bash-doc package. 73 | 74 | #if [ -f ~/.bash_aliases ]; then 75 | # . ~/.bash_aliases 76 | #fi 77 | 78 | # enable color support of ls and also add handy aliases 79 | if [ -x /usr/bin/dircolors ]; then 80 | eval "`dircolors -b`" 81 | alias ls='ls --color=auto' 82 | alias dir='dir --color=auto' 83 | alias vdir='vdir --color=auto' 84 | 85 | alias grep='grep --color=auto' 86 | alias fgrep='fgrep --color=auto' 87 | alias egrep='egrep --color=auto' 88 | fi 89 | 90 | # some more ls aliases 91 | alias ll='ls -l' 92 | alias la='ls -A' 93 | #alias l='ls -CF' 94 | alias jdebug='java -Xdebug -Xrunjdwp:transport=dt_socket,server=y,address=9000' 95 | 96 | # enable programmable completion features (you don't need to enable 97 | # this, if it's already enabled in /etc/bash.bashrc and /etc/profile 98 | # sources /etc/bash.bashrc). 99 | if [ -f /etc/bash_completion ]; then 100 | . /etc/bash_completion 101 | fi 102 | 103 | alias gitmods="git status --short | awk '{print(\$2);}'" 104 | 105 | alias makemore='make 2>&1 | more' 106 | 107 | id () 108 | { 109 | ttgrep.py "id $1" node.tt 110 | } 111 | 112 | cpid() 113 | { 114 | ttgrep.py "id $1" cp.tt 115 | } 116 | 117 | core () 118 | { 119 | ttgrep.py "[C$1]" node.tt > core.tt 120 | less +G core.tt 121 | } 122 | 123 | thread () 124 | { 125 | ttgrep.py "[$1]" cp.tt > thread.tt 126 | less +G thread.tt 127 | } 128 | 129 | cid () 130 | { 131 | ttgrep.py "cid $1" cp.tt > cid.tt 132 | less +G cid.tt 133 | } 134 | 135 | pid () 136 | { 137 | ttgrep.py "pid $1" $2 > pid.tt 138 | less +G pid.tt 139 | } 140 | 141 | export GRPC_VERBOSITY=INFO -------------------------------------------------------------------------------- /cloudlab/bin/ckill: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (c) 2020-2023 Homa Developers 4 | # SPDX-License-Identifier: BSD-1-Clause 5 | 6 | # Kill processes with a given name on a cluster of machines. 7 | # 8 | # Usage: 9 | # ckill name num_nodes [first] 10 | # 11 | # "name" gives the name of the process to be killed (suitable as an 12 | # argument to "pidof"). The "num_nodes" arguments indicates how many 13 | # servers should be examined for processes to kill. The "first" argument 14 | # is optional; it is an integer identifying the first node on which 15 | # killing will occur (e.g. "ckill cp_node 4 2" means node2 through node5 will be 16 | # updated will be searched. "first" defaults to 0. 17 | 18 | root=~/homaModule 19 | 20 | if [ $# -eq 3 ]; then 21 | first=$3 22 | elif [ $# -eq 2 ]; then 23 | first=0 24 | else 25 | echo "Usage: ckill name num_nodes [first]" 26 | exit 1 27 | fi 28 | last=`expr $first + $2 - 1` 29 | 30 | for ((i = $first ; i <= $last; i++)); do 31 | node=node$i 32 | echo "ssh node$i 'kill \`pidof $1\`'" 33 | ssh node$i "kill \`pidof $1\`" 34 | done -------------------------------------------------------------------------------- /cloudlab/bin/install_homa: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (c) 2020-2023 Homa Developers 4 | # SPDX-License-Identifier: BSD-1-Clause 5 | 6 | # This script installs all of the files needed to run Homa tests on one 7 | # or more target machines; it also loads the Homa kernel module. 8 | # 9 | # Usage: 10 | # install_homa [--net-next] num_nodes [first] 11 | # 12 | # The "num_nodes" arguments indicates how many servers should be updated. 13 | # The "first" argument is optional; it is an integer identifying the 14 | # first node on which installation will occur (e.g. "install 4 2" means 15 | # node2 through node5 will be updated. "first" defaults to 0. 16 | # This script assumes that the Homa module binary (homa.ko) has already 17 | # been built. If --net-next is specified, it will be in the kernel build 18 | # directory (see code below for path), otherwise it will be in ~/homaModule. 19 | # In addition, the utility programs in ~/homaModule/util must have been built. 20 | 21 | root=~/homaModule 22 | 23 | set -e 24 | 25 | homa_ko=$root/homa.ko 26 | if [ $1 = "--net-next" ]; then 27 | homa_ko=/netnext/net-next/net/homa/homa.ko 28 | shift 29 | fi 30 | if [ $# -eq 2 ]; then 31 | first=$2 32 | elif [ $# -eq 1 ]; then 33 | first=0 34 | else 35 | echo "Usage: install_homa [--net-next] num_nodes [first]" 36 | exit 1 37 | fi 38 | last=`expr $first + $1 - 1` || true 39 | 40 | for ((i = $first ; i <= $last; i++)); do 41 | node=node$i 42 | echo 43 | echo '*** Installing Homa on' $node '***' 44 | rsync --ipv4 -e "ssh -4 -o StrictHostKeyChecking=no" -rtv ~/.bashrc ~/.bash_profile ~/.gdbinit $node: 45 | rsync --ipv4 -e "ssh -4 -o StrictHostKeyChecking=no" -rtv --exclude __pycache__ ~/bin/ $node:bin/ 46 | rsync --ipv4 -e "ssh -4 -o StrictHostKeyChecking=no" -rtv $homa_ko $root/util/cp_node $root/util/homa_prio $root/util/*.py $node:bin/ 47 | ssh -4 $node 'sudo sysctl .kernel.printk="5 4 1 7"' 48 | ssh -4 $node 'echo $PATH' 49 | ssh -4 $node 'config default' 50 | done -------------------------------------------------------------------------------- /cloudlab/bin/on_nodes: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (c) 2020-2023 Homa Developers 4 | # SPDX-License-Identifier: BSD-1-Clause 5 | 6 | # This uses ssh to run a given command on one or more nodes in the 7 | # cluster. 8 | # 9 | # Usage: 10 | # on_nodes num_first last cmd arg arg ... 11 | # 12 | # The "first" and "last" arguments give the range of nodes (inclusive) on 13 | # which the command should run. The remaining arguments are a command 14 | # and its arguments to run on the given machines 15 | 16 | if [ $# -lt 3 ]; then 17 | echo "Usage: on_nodes first last cmd arg arg ..." 18 | exit 1 19 | fi 20 | first=$1 21 | last=$2 22 | shift 2 23 | 24 | for ((i = $first ; i <= $last; i++)); do 25 | node=node$i 26 | echo "" 27 | echo $node: 28 | ssh $node $@ 29 | done -------------------------------------------------------------------------------- /cloudlab/bin/set_cutoffs: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (c) 2020-2023 Homa Developers 4 | # SPDX-License-Identifier: BSD-1-Clause 5 | 6 | # This script sets the cutoffs for unscheduled priorities on one or more 7 | # nodes to match the characteristics of the Homa workloads. 8 | # 9 | # Usage: 10 | # set_cutoffs workload num_nodes [first] 11 | # 12 | # The "workload" argument must be one of w1-w5; the cutoffs will be set 13 | # to values appropriate for that workload. The "num_nodes" arguments indicates 14 | # how many servers should be updated. The "first" argument is optional; it 15 | # is an integer identifying the first node on which installation will occur 16 | # (e.g. "set_cutoffs w3 4 2" means node2 through node5 will be updated. 17 | # "first" defaults to 0. 18 | 19 | root=~/homaModule 20 | 21 | if [ $# -eq 3 ]; then 22 | first=$3 23 | elif [ $# -eq 2 ]; then 24 | first=0 25 | else 26 | echo "Usage: set_cutoffs workload num_nodes [first]" 27 | exit 1 28 | fi 29 | last=`expr $first + $2 - 1` 30 | 31 | if [ $1 = w1 ]; then 32 | cutoffs="1000000 12288 2112 1280 832 576 384 192" 33 | elif [ $1 = w2 ]; then 34 | cutoffs="1000000 1000000 1000000 7168 1920 640 448 320" 35 | elif [ $1 = w3 ]; then 36 | cutoffs="1000000 1000000 1000000 1000000 1000000 63488 12288 3008" 37 | elif [ $1 = w4 ]; then 38 | cutoffs="1000000 1000000 1000000 1000000 1000000 1000000 1000000 68608" 39 | elif [ $1 = w5 ]; then 40 | cutoffs="1000000 1000000 1000000 1000000 1000000 1000000 1000000 1000000" 41 | else 42 | echo "Unknown workload '$1'; must be w1-w5" 43 | exit 1 44 | fi 45 | 46 | for ((i = $first ; i <= $last; i++)); do 47 | node=node$i 48 | echo '*** Updating cutoffs on' $node '***' 49 | ssh $node sudo sysctl /net/homa/unsched_cutoffs=\"$cutoffs\" 50 | done -------------------------------------------------------------------------------- /cloudlab/bin/update_linux: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (c) 2020-2023 Homa Developers 4 | # SPDX-License-Identifier: BSD-1-Clause 5 | 6 | # This script uses files on the current machine to update the kernel one 7 | # or more other machines and reboot to those machines. 8 | # 9 | # Usage: 10 | # update_linux num_nodes [first] 11 | # 12 | # The "num_nodes" arguments indicates how many nodes the command should 13 | # be run on (starting at node1). The "first" argument is optional; it is 14 | # an integer identifying the first node on which installation will occur 15 | # (e.g. "update_linux 4 2" means node2 through node5 will be updated). 16 | # "first" defaults to 1. 17 | 18 | v=`uname -r` 19 | #v=5.17.7+ 20 | 21 | if [ $# -eq 2 ]; then 22 | first=$2 23 | elif [ $# -eq 1 ]; then 24 | first=1 25 | else 26 | echo "Usage: update_linux num_nodes [first]" 27 | exit 1 28 | fi 29 | last=`expr $first + $1 - 1` 30 | 31 | for ((i = $first ; i <= $last; i++)); do 32 | node=node$i 33 | echo 34 | echo $node 35 | ssh $node 'rm -rf tmp; mkdir -p tmp tmp/boot' 36 | rsync -rtv /boot/initrd.img-$v /boot/config-$v /boot/System.map-$v \ 37 | /boot/vmlinuz-$v $node:tmp/boot/ 38 | ssh $node "sudo cp -f tmp/boot/* /boot; sudo reboot" 39 | done 40 | -------------------------------------------------------------------------------- /cloudlab/config_switch: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright (c) 2020-2023 Homa Developers 4 | # SPDX-License-Identifier: BSD-1-Clause 5 | 6 | # This program outputs the commands needed to configure a CloudLab switch 7 | # for Homa, or to reset it. 8 | # Usage: 9 | # config_switch [reset] 10 | # 11 | # Commands will be printed on stdout. 12 | 13 | import sys 14 | 15 | # Ports to configure 16 | nodes = [81, 82, 83, 84, 85, 87, 88, 89, 91, 93, 94, 95, 96, 98, 100, 102, 17 | 104, 105, 106, 107, 108, 110, 111, 112, 114, 116, 117, 118, 120] 18 | ports = [] 19 | prev_switch = -1 20 | for node in nodes: 21 | switch = (node-1)//40 22 | if (switch != prev_switch) and (prev_switch >= 0): 23 | print("Multiple switches: rack-%d and rack-%d" % (switch, prev_switch), 24 | file=sys.stderr) 25 | prev_switch = switch 26 | ports.append(node - 40*switch) 27 | ports.sort() 28 | # ports = range(1, 41) 29 | 30 | def config(): 31 | # First, must enter "configure" mode 32 | print("enable") 33 | print("configure terminal") 34 | 35 | for port in ports: 36 | # Configure priorities for Homa. 37 | print("interface ethernet 1/%d qos trust both" % (port)) 38 | for tc in range(8): 39 | print("interface ethernet 1/%d traffic-class %d dcb ets strict" % 40 | (port, tc)) 41 | 42 | # Enable large packets 43 | print("interface ethernet 1/%d mtu 9216 force" % (port)) 44 | 45 | # Set DCTCP marking thresholds: 46 | print("interface ethernet 1/%d traffic-class 0 congestion-control ecn " 47 | "minimum-absolute 70 maximum-absolute 70" % (port)) 48 | print("interface ethernet 1/%d traffic-class 1 congestion-control ecn " 49 | "minimum-absolute 70 maximum-absolute 70" % (port)) 50 | 51 | def reset(): 52 | # First, must enter "configure" mode 53 | print("enable") 54 | print("configure terminal") 55 | 56 | for port in ports: 57 | # Restore QOS priorities. 58 | print("interface ethernet 1/%d no qos trust" % (port)) 59 | for tc in range(8): 60 | print("interface ethernet 1/%d traffic-class %d no dcb ets" % 61 | (port, tc)) 62 | 63 | # Disable large packets 64 | print("interface ethernet 1/%d mtu 1500 force" % (port)) 65 | 66 | # Reset DCTCP marking thresholds: 67 | print("interface ethernet 1/%d no traffic-class 0 congestion-control" 68 | % (port)) 69 | print("interface ethernet 1/%d no traffic-class 1 congestion-control" 70 | % (port)) 71 | 72 | if len(sys.argv) > 1: 73 | if sys.argv[1] == "reset": 74 | reset() 75 | else: 76 | print("Usage: config_switch [reset]", file=sys.stderr) 77 | else: 78 | config() -------------------------------------------------------------------------------- /cloudlab/gdbinit: -------------------------------------------------------------------------------- 1 | set style address foreground green 2 | -------------------------------------------------------------------------------- /cloudlab/update: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Copyright (c) 2019-2020 Homa Developers 4 | # SPDX-License-Identifier: BSD-1-Clause 5 | 6 | # This script copies modified information from this directory to the 7 | # CloudLab machines given by the arguments (defaults are provided if no 8 | # arguments). 9 | 10 | if [ $# -eq 0 ]; then 11 | targets=`cat $HOME/.cloudlabNode` 12 | else 13 | targets=$* 14 | fi 15 | 16 | for t in $targets; do 17 | echo $t 18 | rsync -rtv --exclude-from=rsync-exclude.txt ./ ouster@$t:homaModule/ 19 | done 20 | for t in $targets; do 21 | echo $t 22 | rsync -rtv cloudlab/bin/ ouster@$t:bin/ 23 | rsync -rtv cloudlab/bash_profile ouster@$t:.bash_profile 24 | rsync -rtv cloudlab/bashrc ouster@$t:.bashrc 25 | rsync -rtv cloudlab/gdbinit ouster@$t:.gdbinit 26 | rsync -rtv ~/.ssh/cloudlab_rsa.pub ~/.ssh/cloudlab_rsa $t:.ssh/ 27 | ssh $t 'if [ ! -e .ssh/id_rsa ]; then 28 | cp .ssh/cloudlab_rsa .ssh/id_rsa 29 | fi' 30 | done 31 | -------------------------------------------------------------------------------- /dissector/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # CMakeLists.txt 2 | # 3 | # Copyright 2023 Missing Link Electronics Inc, 4 | # Björn Petersen 5 | # 6 | # This code is dual licensed under one of the following 2 licenses: 7 | # 8 | # ################ 9 | # # GPL2 License # 10 | # ################ 11 | # 12 | # This program is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU General Public License 14 | # as published by the Free Software Foundation; either version 2 15 | # of the License, or (at your option) any later version. 16 | # 17 | # This program is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 | # GNU General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU General Public License 23 | # along with this program; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 25 | # 26 | # 27 | # ################ 28 | # # HOMA License # 29 | # ################ 30 | # 31 | # Permission to use, copy, modify, and/or distribute this software for any 32 | # purpose with or without fee is hereby granted, provided that the above 33 | # copyright notice and this permission notice appear in all copies. 34 | # 35 | # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 36 | # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 37 | # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 38 | # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 39 | # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 40 | # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 41 | # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 42 | 43 | cmake_minimum_required(VERSION 3.1) 44 | 45 | project(HomaDissector VERSION 0.0.1 DESCRIPTION "Wireshark Homa Plugin" LANGUAGES C) 46 | option(INSTALL_PLUGIN_LOCAL "Install the homa dissector plugin inside the local folder of wireshark" ON) 47 | 48 | find_package(Wireshark CONFIG REQUIRED) 49 | 50 | if (NOT Wireshark_PLUGINS_ENABLED) 51 | message(FATAL_ERROR "Wireshark was compiled without support for plugins") 52 | endif () 53 | 54 | set(CMAKE_C_VISIBILITY_PRESET hidden) 55 | if (CMAKE_COMPILER_IS_GNUCC) 56 | set(CMAKE_C_FLAGS "-Wall -Wextra ${CMAKE_C_FLAGS}") 57 | endif () 58 | 59 | add_definitions(-DVERSION=\"${PROJECT_VERSION}\") 60 | 61 | add_library(${PROJECT_NAME} MODULE homa.c) 62 | set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" DEFINE_SYMBOL "") 63 | target_link_libraries(${PROJECT_NAME} epan) 64 | 65 | if (${INSTALL_PLUGIN_LOCAL}) 66 | install(TARGETS ${PROJECT_NAME} LIBRARY DESTINATION "$ENV{HOME}/.local/lib/wireshark/plugins/${Wireshark_MAJOR_VERSION}.${Wireshark_MINOR_VERSION}/epan" NAMELINK_SKIP) 67 | else () 68 | install(TARGETS ${PROJECT_NAME} LIBRARY DESTINATION "${Wireshark_PLUGIN_INSTALL_DIR}/epan" NAMELINK_SKIP) 69 | endif () 70 | -------------------------------------------------------------------------------- /dissector/README.md: -------------------------------------------------------------------------------- 1 | # HOMA Dissector 2 | 3 | A dissector for viewing [homa](https://homa-transport.atlassian.net/wiki/spaces/HOMA/overview) packets. The dissector 4 | was tested with Ubuntu 18.04 and Ubuntu 22.04 with the Wireshark version 3.6.2 5 | 6 | ## Prerequisites 7 | 8 | The dissector is a cmake based wireshark plugin. For building please make sure that the required wireshark dependencies, 9 | including wireshark headers, are installed. For Debian based systems the following command line may be 10 | used: `apt install wireshark-dev wireshark-common` 11 | 12 | ## Installation 13 | 14 | The Plugin can be installed with the following steps. 15 | 16 | ```shell 17 | cmake . 18 | make 19 | make install 20 | ``` 21 | 22 | Per default, the plugin will be installed inside the local plugin folder of wireshark. For installing the plugin global 23 | on your system, run the following command: 24 | 25 | ```shell 26 | cmake -DINSTALL_PLUGIN_LOCAL=OFF . 27 | make 28 | sudo make install 29 | ``` 30 | -------------------------------------------------------------------------------- /homa.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-2-Clause */ 2 | 3 | /* This file defines the kernel call interface for the Homa 4 | * transport protocol. 5 | */ 6 | 7 | #ifndef _UAPI_LINUX_HOMA_H 8 | #define _UAPI_LINUX_HOMA_H 9 | 10 | #include 11 | #ifndef __KERNEL__ 12 | #include 13 | #include 14 | #endif 15 | 16 | /* IANA-assigned Internet Protocol number for Homa. */ 17 | #define IPPROTO_HOMA 146 18 | 19 | /** 20 | * define HOMA_MAX_MESSAGE_LENGTH - Maximum bytes of payload in a Homa 21 | * request or response message. 22 | */ 23 | #define HOMA_MAX_MESSAGE_LENGTH 1000000 24 | 25 | /** 26 | * define HOMA_BPAGE_SIZE - Number of bytes in pages used for receive 27 | * buffers. Must be power of two. 28 | */ 29 | #define HOMA_BPAGE_SIZE (1 << HOMA_BPAGE_SHIFT) 30 | #define HOMA_BPAGE_SHIFT 16 31 | 32 | /** 33 | * define HOMA_MAX_BPAGES - The largest number of bpages that will be required 34 | * to store an incoming message. 35 | */ 36 | #define HOMA_MAX_BPAGES ((HOMA_MAX_MESSAGE_LENGTH + HOMA_BPAGE_SIZE - 1) \ 37 | >> HOMA_BPAGE_SHIFT) 38 | 39 | /** 40 | * define HOMA_MIN_DEFAULT_PORT - The 16 bit port space is divided into 41 | * two nonoverlapping regions. Ports 1-32767 are reserved exclusively 42 | * for well-defined server ports. The remaining ports are used for client 43 | * ports; these are allocated automatically by Homa. Port 0 is reserved. 44 | */ 45 | #define HOMA_MIN_DEFAULT_PORT 0x8000 46 | 47 | /** 48 | * struct homa_sendmsg_args - Provides information needed by Homa's 49 | * sendmsg; passed to sendmsg using the msg_control field. 50 | */ 51 | struct homa_sendmsg_args { 52 | /** 53 | * @id: (in/out) An initial value of 0 means a new request is 54 | * being sent; nonzero means the message is a reply to the given 55 | * id. If the message is a request, then the value is modified to 56 | * hold the id of the new RPC. 57 | */ 58 | __u64 id; 59 | 60 | /** 61 | * @completion_cookie: (in) Used only for request messages; will be 62 | * returned by recvmsg when the RPC completes. Typically used to 63 | * locate app-specific info about the RPC. 64 | */ 65 | __u64 completion_cookie; 66 | }; 67 | 68 | #if !defined(__cplusplus) 69 | _Static_assert(sizeof(struct homa_sendmsg_args) >= 16, 70 | "homa_sendmsg_args shrunk"); 71 | _Static_assert(sizeof(struct homa_sendmsg_args) <= 16, 72 | "homa_sendmsg_args grew"); 73 | #endif 74 | 75 | /** 76 | * struct homa_recvmsg_args - Provides information needed by Homa's 77 | * recvmsg; passed to recvmsg using the msg_control field. 78 | */ 79 | struct homa_recvmsg_args { 80 | /** 81 | * @id: (in/out) Initially specifies the id of the desired RPC, or 0 82 | * if any RPC is OK; returns the actual id received. 83 | */ 84 | __u64 id; 85 | 86 | /** 87 | * @completion_cookie: (out) If the incoming message is a response, 88 | * this will return the completion cookie specified when the 89 | * request was sent. For requests this will always be zero. 90 | */ 91 | __u64 completion_cookie; 92 | 93 | /** 94 | * @flags: (in) OR-ed combination of bits that control the operation. 95 | * See below for values. 96 | */ 97 | __u32 flags; 98 | 99 | /** 100 | * @num_bpages: (in/out) Number of valid entries in @bpage_offsets. 101 | * Passes in bpages from previous messages that can now be 102 | * recycled; returns bpages from the new message. 103 | */ 104 | __u32 num_bpages; 105 | 106 | /** 107 | * @bpage_offsets: (in/out) Each entry is an offset into the buffer 108 | * region for the socket pool. When returned from recvmsg, the 109 | * offsets indicate where fragments of the new message are stored. All 110 | * entries but the last refer to full buffer pages (HOMA_BPAGE_SIZE 111 | * bytes) and are bpage-aligned. The last entry may refer to a bpage 112 | * fragment and is not necessarily aligned. The application now owns 113 | * these bpages and must eventually return them to Homa, using 114 | * bpage_offsets in a future recvmsg invocation. 115 | */ 116 | __u32 bpage_offsets[HOMA_MAX_BPAGES]; 117 | }; 118 | 119 | #if !defined(__cplusplus) 120 | _Static_assert(sizeof(struct homa_recvmsg_args) >= 88, 121 | "homa_recvmsg_args shrunk"); 122 | _Static_assert(sizeof(struct homa_recvmsg_args) <= 88, 123 | "homa_recvmsg_args grew"); 124 | #endif 125 | 126 | /* Flag bits for homa_recvmsg_args.flags (see man page for documentation): 127 | */ 128 | #define HOMA_RECVMSG_REQUEST 0x01 129 | #define HOMA_RECVMSG_RESPONSE 0x02 130 | #define HOMA_RECVMSG_NONBLOCKING 0x04 131 | #define HOMA_RECVMSG_VALID_FLAGS 0x07 132 | 133 | /** 134 | * struct homa_abort_args - Structure that passes arguments and results 135 | * between user space and the HOMAIOCABORT ioctl. 136 | */ 137 | struct homa_abort_args { 138 | /** @id: Id of RPC to abort, or zero to abort all RPCs on socket. */ 139 | __u64 id; 140 | 141 | /** 142 | * @error: Zero means destroy and free RPCs; nonzero means complete 143 | * them with this error (recvmsg will return the RPCs). 144 | */ 145 | int error; 146 | 147 | int _pad1; 148 | __u64 _pad2[2]; 149 | }; 150 | 151 | #if !defined(__cplusplus) 152 | _Static_assert(sizeof(struct homa_abort_args) >= 32, "homa_abort_args shrunk"); 153 | _Static_assert(sizeof(struct homa_abort_args) <= 32, "homa_abort_args grew"); 154 | #endif 155 | 156 | /** define SO_HOMA_RCVBUF: setsockopt option for specifying buffer region. */ 157 | #define SO_HOMA_RCVBUF 10 158 | 159 | /** struct homa_rcvbuf_args - setsockopt argument for SO_HOMA_RCVBUF. */ 160 | struct homa_rcvbuf_args { 161 | /** @start: Address of first byte of buffer region in user space. */ 162 | __u64 start; 163 | 164 | /** @length: Total number of bytes available at @start. */ 165 | size_t length; 166 | }; 167 | 168 | /* Meanings of the bits in Homa's flag word, which can be set using 169 | * "sysctl /net/homa/flags". 170 | */ 171 | 172 | /** 173 | * define HOMA_FLAG_DONT_THROTTLE - disable the output throttling mechanism 174 | * (always send all packets immediately). 175 | */ 176 | #define HOMA_FLAG_DONT_THROTTLE 2 177 | 178 | /** 179 | * I/O control calls on Homa sockets. These are mapped into the 180 | * SIOCPROTOPRIVATE range of 0x89e0 through 0x89ef. 181 | */ 182 | 183 | #define HOMAIOCABORT _IOWR(0x89, 0xe3, struct homa_abort_args) 184 | #define HOMAIOCFREEZE _IO(0x89, 0xef) 185 | 186 | #ifndef __STRIP__ /* See strip.py */ 187 | int homa_abort(int sockfd, __u64 id, int error); 188 | int homa_send(int sockfd, const void *message_buf, 189 | size_t length, const struct sockaddr *dest_addr, 190 | __u32 addrlen, __u64 *id, __u64 completion_cookie); 191 | int homa_sendv(int sockfd, const struct iovec *iov, 192 | int iovcnt, const struct sockaddr *dest_addr, 193 | __u32 addrlen, __u64 *id, __u64 completion_cookie); 194 | ssize_t homa_reply(int sockfd, const void *message_buf, 195 | size_t length, const struct sockaddr *dest_addr, 196 | __u32 addrlen, __u64 id); 197 | ssize_t homa_replyv(int sockfd, const struct iovec *iov, 198 | int iovcnt, const struct sockaddr *dest_addr, 199 | __u32 addrlen, __u64 id); 200 | #endif /* See strip.py */ 201 | 202 | #endif /* _UAPI_LINUX_HOMA_H */ 203 | -------------------------------------------------------------------------------- /homa_grant.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-2-Clause */ 2 | 3 | /* This file contains definitions that related to generating grants. */ 4 | 5 | #ifndef _HOMA_GRANT_H 6 | #define _HOMA_GRANT_H 7 | 8 | int homa_grantable_lock_slow(struct homa *homa, int recalc); 9 | void homa_grant_add_rpc(struct homa_rpc *rpc); 10 | void homa_grant_check_rpc(struct homa_rpc *rpc); 11 | void homa_grant_find_oldest(struct homa *homa); 12 | void homa_grant_free_rpc(struct homa_rpc *rpc); 13 | void homa_grant_log_tt(struct homa *homa); 14 | int homa_grant_outranks(struct homa_rpc *rpc1, 15 | struct homa_rpc *rpc2); 16 | int homa_grant_pick_rpcs(struct homa *homa, struct homa_rpc **rpcs, 17 | int max_rpcs); 18 | void homa_grant_pkt(struct sk_buff *skb, struct homa_rpc *rpc); 19 | void homa_grant_recalc(struct homa *homa, int locked); 20 | void homa_grant_remove_rpc(struct homa_rpc *rpc); 21 | int homa_grant_send(struct homa_rpc *rpc, struct homa *homa); 22 | int homa_grant_update_incoming(struct homa_rpc *rpc, 23 | struct homa *homa); 24 | 25 | /** 26 | * homa_grantable_lock() - Acquire the grantable lock. If the lock 27 | * isn't immediately available, record stats on the waiting time. 28 | * @homa: Overall data about the Homa protocol implementation. 29 | * @recalc: Nonzero means the caller is homa_grant_recalc; if another thread 30 | * is already recalculating, can return without waiting for the lock. 31 | * Return: Nonzero means this thread now owns the grantable lock. Zero 32 | * means the lock was not acquired and there is no need for this 33 | * thread to do the work of homa_grant_recalc because some other 34 | * thread started a fresh calculation after this method was invoked. 35 | */ 36 | static inline int homa_grantable_lock(struct homa *homa, int recalc) 37 | __acquires(&homa->grantable_lock) 38 | { 39 | int result; 40 | 41 | if (spin_trylock_bh(&homa->grantable_lock)) 42 | result = 1; 43 | else 44 | result = homa_grantable_lock_slow(homa, recalc); 45 | homa->grantable_lock_time = sched_clock(); 46 | return result; 47 | } 48 | 49 | /** 50 | * homa_grantable_unlock() - Release the grantable lock. 51 | * @homa: Overall data about the Homa protocol implementation. 52 | */ 53 | static inline void homa_grantable_unlock(struct homa *homa) 54 | __releases(&homa->grantable_lock) 55 | { 56 | INC_METRIC(grantable_lock_ns, sched_clock() - 57 | homa->grantable_lock_time); 58 | spin_unlock_bh(&homa->grantable_lock); 59 | } 60 | 61 | #endif /* _HOMA_GRANT_H */ 62 | -------------------------------------------------------------------------------- /homa_offload.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-2-Clause */ 2 | 3 | /* This file contains definitions related to homa_offload.c. */ 4 | 5 | #ifndef _HOMA_OFFLOAD_H 6 | #define _HOMA_OFFLOAD_H 7 | 8 | #include 9 | 10 | /** 11 | * struct homa_offload_core - Stores core-specific information used during 12 | * GRO operations. 13 | */ 14 | struct homa_offload_core { 15 | /** 16 | * @last_active: the last time (in sched_clock() units) that 17 | * there was system activity, such NAPI or SoftIRQ, on this 18 | * core. Used for load balancing. 19 | */ 20 | __u64 last_active; 21 | 22 | /** 23 | * @last_gro: the last time (in sched_clock() units) that 24 | * homa_gro_receive returned on this core. Used to determine 25 | * whether GRO is keeping a core busy. 26 | */ 27 | __u64 last_gro; 28 | 29 | /** 30 | * @softirq_backlog: the number of batches of packets that have 31 | * been queued for SoftIRQ processing on this core but haven't 32 | * yet been processed. 33 | */ 34 | atomic_t softirq_backlog; 35 | 36 | /** 37 | * @softirq_offset: used when rotating SoftIRQ assignment among 38 | * the next cores; contains an offset to add to the current core 39 | * to produce the core for SoftIRQ. 40 | */ 41 | int softirq_offset; 42 | 43 | /** 44 | * @gen3_softirq_cores: when the Gen3 load balancer is in use, 45 | * GRO will arrange for SoftIRQ processing to occur on one of 46 | * these cores; -1 values are ignored (see balance.txt for more 47 | * on lewd balancing). This information is filled in via sysctl. 48 | */ 49 | #define NUM_GEN3_SOFTIRQ_CORES 3 50 | int gen3_softirq_cores[NUM_GEN3_SOFTIRQ_CORES]; 51 | 52 | /** 53 | * @last_app_active: the most recent time (sched_clock() units) 54 | * when an application was actively using Homa on this core (e.g., 55 | * by sending or receiving messages). Used for load balancing 56 | * (see balance.txt). 57 | */ 58 | __u64 last_app_active; 59 | 60 | /** 61 | * held_skb: last packet buffer known to be available for 62 | * merging other packets into on this core (note: may not still 63 | * be available), or NULL if none. 64 | */ 65 | struct sk_buff *held_skb; 66 | 67 | /** 68 | * @held_bucket: the index, within napi->gro_hash, of the list 69 | * containing @held_skb; undefined if @held_skb is NULL. Used to 70 | * verify that @held_skb is still available. 71 | */ 72 | int held_bucket; 73 | }; 74 | DECLARE_PER_CPU(struct homa_offload_core, homa_offload_core); 75 | 76 | int homa_gro_complete(struct sk_buff *skb, int thoff); 77 | void homa_gro_gen2(struct homa *homa, struct sk_buff *skb); 78 | void homa_gro_gen3(struct homa *homa, struct sk_buff *skb); 79 | void homa_gro_hook_tcp(void); 80 | void homa_gro_unhook_tcp(void); 81 | struct sk_buff *homa_gro_receive(struct list_head *gro_list, 82 | struct sk_buff *skb); 83 | struct sk_buff *homa_gso_segment(struct sk_buff *skb, 84 | netdev_features_t features); 85 | int homa_offload_end(void); 86 | int homa_offload_init(void); 87 | void homa_send_ipis(void); 88 | struct sk_buff *homa_tcp_gro_receive(struct list_head *held_list, 89 | struct sk_buff *skb); 90 | 91 | #endif /* _HOMA_OFFLOAD_H */ 92 | -------------------------------------------------------------------------------- /homa_pool.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-2-Clause */ 2 | 3 | /* This file contains definitions used to manage user-space buffer pools. 4 | */ 5 | 6 | #ifndef _HOMA_POOL_H 7 | #define _HOMA_POOL_H 8 | 9 | #include "homa_rpc.h" 10 | 11 | /** 12 | * struct homa_bpage - Contains information about a single page in 13 | * a buffer pool. 14 | */ 15 | struct homa_bpage { 16 | union { 17 | /** 18 | * @cache_line: Ensures that each homa_bpage object 19 | * is exactly one cache line long. 20 | */ 21 | char cache_line[L1_CACHE_BYTES]; 22 | struct { 23 | /** @lock: to synchronize shared access. */ 24 | spinlock_t lock; 25 | 26 | /** 27 | * @refs: Counts number of distinct uses of this 28 | * bpage (1 tick for each message that is using 29 | * this page, plus an additional tick if the @owner 30 | * field is set). 31 | */ 32 | atomic_t refs; 33 | 34 | /** 35 | * @owner: kernel core that currently owns this page 36 | * (< 0 if none). 37 | */ 38 | int owner; 39 | 40 | /** 41 | * @expiration: time (in sched_clock() units) after 42 | * which it's OK to steal this page from its current 43 | * owner (if @refs is 1). 44 | */ 45 | __u64 expiration; 46 | }; 47 | }; 48 | }; 49 | 50 | #ifndef __STRIP__ /* See strip.py */ 51 | _Static_assert(sizeof(struct homa_bpage) == L1_CACHE_BYTES, 52 | "homa_bpage overflowed a cache line"); 53 | #endif /* See strip.py */ 54 | 55 | /** 56 | * struct homa_pool_core - Holds core-specific data for a homa_pool (a bpage 57 | * out of which that core is allocating small chunks). 58 | */ 59 | struct homa_pool_core { 60 | union { 61 | /** 62 | * @cache_line: Ensures that each object is exactly one 63 | * cache line long. 64 | */ 65 | char cache_line[L1_CACHE_BYTES]; 66 | struct { 67 | /** 68 | * @page_hint: Index of bpage in pool->descriptors, 69 | * which may be owned by this core. If so, we'll use it 70 | * for allocating partial pages. 71 | */ 72 | int page_hint; 73 | 74 | /** 75 | * @allocated: if the page given by @page_hint is 76 | * owned by this core, this variable gives the number of 77 | * (initial) bytes that have already been allocated 78 | * from the page. 79 | */ 80 | int allocated; 81 | 82 | /** 83 | * @next_candidate: when searching for free bpages, 84 | * check this index next. 85 | */ 86 | int next_candidate; 87 | }; 88 | }; 89 | }; 90 | 91 | #ifndef __STRIP__ /* See strip.py */ 92 | _Static_assert(sizeof(struct homa_pool_core) == L1_CACHE_BYTES, 93 | "homa_pool_core overflowed a cache line"); 94 | #endif /* See strip.py */ 95 | 96 | /** 97 | * struct homa_pool - Describes a pool of buffer space for incoming 98 | * messages for a particular socket; managed by homa_pool.c. The pool is 99 | * divided up into "bpages", which are a multiple of the hardware page size. 100 | * A bpage may be owned by a particular core so that it can more efficiently 101 | * allocate space for small messages. 102 | */ 103 | struct homa_pool { 104 | /** 105 | * @hsk: the socket that this pool belongs to. 106 | */ 107 | struct homa_sock *hsk; 108 | 109 | /** 110 | * @region: beginning of the pool's region (in the app's virtual 111 | * memory). Divided into bpages. 0 means the pool hasn't yet been 112 | * initialized. 113 | */ 114 | char __user *region; 115 | 116 | /** @num_bpages: total number of bpages in the pool. */ 117 | int num_bpages; 118 | 119 | /** @descriptors: kmalloced area containing one entry for each bpage. */ 120 | struct homa_bpage *descriptors; 121 | 122 | /** 123 | * @free_bpages: the number of pages still available for allocation 124 | * by homa_pool_get pages. This equals the number of pages with zero 125 | * reference counts, minus the number of pages that have been claimed 126 | * by homa_get_pool_pages but not yet allocated. 127 | */ 128 | atomic_t free_bpages; 129 | 130 | /** 131 | * @bpages_needed: the number of free bpages required to satisfy the 132 | * needs of the first RPC on @hsk->waiting_for_bufs, or INT_MAX if 133 | * that queue is empty. 134 | */ 135 | int bpages_needed; 136 | 137 | /** @cores: core-specific info; dynamically allocated. */ 138 | struct homa_pool_core *cores; 139 | 140 | /** @num_cores: number of elements in @cores. */ 141 | int num_cores; 142 | 143 | /** 144 | * @check_waiting_invoked: incremented during unit tests when 145 | * homa_pool_check_waiting is invoked. 146 | */ 147 | int check_waiting_invoked; 148 | }; 149 | 150 | int homa_pool_allocate(struct homa_rpc *rpc); 151 | void homa_pool_check_waiting(struct homa_pool *pool); 152 | void homa_pool_destroy(struct homa_pool *pool); 153 | void __user *homa_pool_get_buffer(struct homa_rpc *rpc, int offset, 154 | int *available); 155 | int homa_pool_get_pages(struct homa_pool *pool, int num_pages, 156 | __u32 *pages, int leave_locked); 157 | void homa_pool_get_rcvbuf(struct homa_sock *hsk, 158 | struct homa_rcvbuf_args *args); 159 | int homa_pool_init(struct homa_sock *hsk, void *buf_region, 160 | __u64 region_size); 161 | int homa_pool_release_buffers(struct homa_pool *pool, 162 | int num_buffers, __u32 *buffers); 163 | 164 | #endif /* _HOMA_POOL_H */ 165 | -------------------------------------------------------------------------------- /homa_receiver.cc: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2022 Homa Developers 2 | * SPDX-License-Identifier: BSD-1-Clause 3 | */ 4 | 5 | #include 6 | 7 | #include "homa_receiver.h" 8 | 9 | /** 10 | * homa::receiver::homa() - Constructor for receivers. 11 | * @fd: Homa socket from which this object will receive incoming 12 | * messages. The caller is responsible for setting up buffering 13 | * on the socket using setsockopt with the SO_HOMA_RCVBUF option. 14 | * The file descriptor must be valid for the lifetime of this 15 | * object. 16 | * @buf_region: Location of the buffer region that was allocated for 17 | * this socket. 18 | */ 19 | homa::receiver::receiver(int fd, void *buf_region) 20 | : fd(fd) 21 | , hdr() 22 | , control() 23 | , source() 24 | , msg_length(-1) 25 | , buf_region(reinterpret_cast(buf_region)) 26 | { 27 | memset(&hdr, 0, sizeof(hdr)); 28 | hdr.msg_name = &source; 29 | hdr.msg_namelen = sizeof(source); 30 | hdr.msg_control = &control; 31 | hdr.msg_controllen = sizeof(control); 32 | 33 | memset(&control, 0, sizeof(control)); 34 | } 35 | 36 | /** 37 | * homa::receiver::~homa() - Destructor for homa::receivers. The main purpose of 38 | * this destructor is to return any residual buffers to Homa. 39 | */ 40 | homa::receiver::~receiver() 41 | { 42 | release(); 43 | } 44 | 45 | /** 46 | * homa::receiver::copy_out() - Copy data out of the current message. 47 | * @dest: Data will be copied here. 48 | * @offset: Offset within the message of the first byte to copy. 49 | * @count: Number of bytes to copy; if the message doesn't contain 50 | * this many bytes starting at offset, then only the 51 | * available number of bytes will be copied. 52 | */ 53 | void homa::receiver::copy_out(void *dest, size_t offset, size_t count) const 54 | { 55 | char *cdest = static_cast(dest); 56 | ssize_t limit = offset + count; 57 | 58 | if (limit > msg_length) 59 | limit = msg_length; 60 | while (static_cast(offset) < limit) { 61 | size_t chunk_size = contiguous(offset); 62 | 63 | memcpy(cdest, get(offset), chunk_size); 64 | offset += chunk_size; 65 | cdest += chunk_size; 66 | } 67 | } 68 | 69 | /** 70 | * homa::receiver::receive() - Release resources for the current message, if 71 | * any, and receive a new incoming message. 72 | * @flags: Various OR'ed bits such as HOMA_RECVMSG_REQUEST and 73 | * HOMA_RECVMSG_NONBLOCKING. See the Homa documentation 74 | * for the flags field of recvmsg for details. 75 | * @id: Identifier of a particular RPC whose result is desired, 76 | * or 0. See the Homa documentation for the id field of 77 | * recvmsg for details. 78 | * Return: The length of the new active message. If an error occurs, -1 79 | * is returned and additional information is available in 80 | * errno. Note: if id() returns a nonzero result after an 81 | * error, it means that that RPC has now completed with an error 82 | * and errno describes the nature of the error. 83 | */ 84 | size_t homa::receiver::receive(int flags, uint64_t id) 85 | { 86 | control.flags = flags; 87 | control.id = id; 88 | hdr.msg_namelen = sizeof(source); 89 | hdr.msg_controllen = sizeof(control); 90 | msg_length = recvmsg(fd, &hdr, 0); 91 | if (msg_length < 0) { 92 | control.num_bpages = 0; 93 | id = 0; 94 | } 95 | return msg_length; 96 | } 97 | 98 | /** 99 | * homa::receiver::release() - Release any resources associated with the 100 | * current message, if any. The current message must not be accessed again 101 | * until receive has returned successfully. 102 | */ 103 | void homa::receiver::release() 104 | { 105 | if (control.num_bpages == 0) 106 | return; 107 | 108 | /* This recvmsg request will do nothing except return buffer space. */ 109 | control.flags = HOMA_RECVMSG_NONBLOCKING; 110 | control.id = 0; 111 | recvmsg(fd, &hdr, 0); 112 | control.num_bpages = 0; 113 | msg_length = -1; 114 | } -------------------------------------------------------------------------------- /homa_receiver.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-2-Clause */ 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | 8 | extern "C" { 9 | #include "homa.h" 10 | } 11 | 12 | namespace homa { 13 | /* Helper class for receiving a series of messages from a Homa socket. This 14 | * class serves two purposes: first, it implements the application side of 15 | * the Homa buffer management protocol, returning receive buffer space to 16 | * Homa when the application longer needs it. Second, it provides convenience 17 | * methods for accessing messages that are scattered over several discontiguous 18 | * regions of buffer space. 19 | * 20 | * Typical usage: 21 | * - Call receive, which will invoke Homa to receive an incoming message. 22 | * - Access the message using methods such as get and copy_out (note: if 23 | * the message is shorter than HOMA_BPAGE_SIZE then it will be contiguous). 24 | * - Call receive to get the next message. This releases all of the resources 25 | * associated with the previous message, so you can no longer access that. 26 | * - Access the new message ... 27 | * 28 | * A single homa::receiver allows only a single active incoming message 29 | * at a time. However, you can create multiple homa::receivers for the 30 | * same Homa socket, each of which can have one active message. An 31 | * individual homa::receiver is not thread-safe. 32 | */ 33 | class receiver { 34 | public: 35 | receiver(int fd, void *buf_regio); 36 | ~receiver(); 37 | 38 | /** 39 | * homa::receiver::contiguous() - Return a count of the number 40 | * of contiguous bytes that are available in the current message 41 | * at a given offset. Zero is returned if there is no current message 42 | * or the offset is beyond the end of the message. 43 | * @offset: An offset from the beginning of the current message. 44 | */ 45 | inline size_t contiguous(size_t offset) const 46 | { 47 | if (static_cast(offset) >= msg_length) 48 | return 0; 49 | if ((offset >> HOMA_BPAGE_SHIFT) == (control.num_bpages - 1)) 50 | return msg_length - offset; 51 | return HOMA_BPAGE_SIZE - (offset & (HOMA_BPAGE_SIZE - 1)); 52 | } 53 | 54 | /** 55 | * homa::receiver::completion_cookie() - Return the completion 56 | * cookie associated with the current message; result is undefined 57 | * if there is no current message. 58 | */ 59 | uint64_t completion_cookie(void) const 60 | { 61 | return control.completion_cookie; 62 | } 63 | 64 | void copy_out(void *dest, size_t offset, size_t count) const; 65 | 66 | /** 67 | * homa::receiver::get() - Make part of the current message 68 | * accessible. 69 | * @offset: Offset within the message of the first byte of an object 70 | * of type T 71 | * @storage: Pointer to a memory region containing at least sizeof(T) 72 | * bytes. If the desired object's bytes are not currently in 73 | * contiguous storage in the message, and if this argument 74 | * is non-null, information is copied out of the message 75 | * into this object so that it is contiguous. 76 | * Return: A pointer to the desired object (either in the message 77 | * or at *storage), or nullptr if the object could not be 78 | * returned (because it extended beyond the end of the 79 | * message, or it wasn't contiguous and storage was nullptr) 80 | */ 81 | template 82 | inline T* get(size_t offset, T* storage = nullptr) const { 83 | int buf_num = offset >> HOMA_BPAGE_SHIFT; 84 | 85 | if (static_cast(offset + sizeof(T)) > msg_length) 86 | return nullptr; 87 | if (contiguous(offset) >= sizeof(T)) 88 | return reinterpret_cast(buf_region 89 | + control.bpage_offsets[buf_num] 90 | + (offset & (HOMA_BPAGE_SIZE - 1))); 91 | if (storage) 92 | copy_out(storage, offset, sizeof(T)); 93 | return storage; 94 | } 95 | 96 | /** 97 | * id() - Return the Homa RPC identifier for the current message, 98 | * or 0 if there is no current message. 99 | */ 100 | inline uint64_t id(void) const 101 | { 102 | return control.id; 103 | } 104 | 105 | /** 106 | * homa::receiver::is_request() - Return true if the current message 107 | * is a request, and false if it is a response or if there is no 108 | * current message. 109 | */ 110 | bool is_request(void) const 111 | { 112 | return control.id & 1; 113 | } 114 | 115 | /** 116 | * homa::receiver::length() - Return the total number of bytes 117 | * current message, or a negative value if there is no current 118 | * message. 119 | */ 120 | ssize_t length(void) const 121 | { 122 | return msg_length; 123 | } 124 | 125 | size_t receive(int flags, uint64_t id); 126 | void release(void); 127 | 128 | /** 129 | * homa::receiver::src_addr() - Return a pointer to the address 130 | * of the sender of the current message. The result is undefined 131 | * if there is no current message. 132 | */ 133 | const struct sockaddr *src_addr(void) const 134 | { 135 | return &source.sa; 136 | } 137 | 138 | protected: 139 | /** @fd: File descriptor for an open Homa socket. */ 140 | int fd; 141 | 142 | /** @hdr: Used to pass information to the recvmsg system call. */ 143 | struct msghdr hdr; 144 | 145 | /** 146 | * @control: Additional Homa-specific information passed to the 147 | * recvmsg system call through hdr->msg_control. Note: if 148 | * num_buffers != 0, it means this contains buffers from a previous 149 | * message that must be returned to Homa. 150 | */ 151 | struct homa_recvmsg_args control; 152 | 153 | /** @source: Address of the node that sent the current message. */ 154 | union { 155 | struct sockaddr sa; 156 | struct sockaddr_in in4; 157 | struct sockaddr_in6 in6; 158 | } source; 159 | 160 | /** @length: Length of the current message, or < 0 if none. */ 161 | ssize_t msg_length; 162 | 163 | /** @buf_region: First byte of buffer space for this message. */ 164 | char *buf_region; 165 | }; 166 | 167 | } // namespace homa 168 | -------------------------------------------------------------------------------- /homa_skb.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-2-Clause */ 2 | 3 | /* This file contains definitions related to efficient management of 4 | * memory associated with transmit sk_buffs. 5 | */ 6 | 7 | #ifndef _HOMA_SKB_H 8 | #define _HOMA_SKB_H 9 | 10 | #include 11 | 12 | /** 13 | * define HOMA_SKB_PAGE_ORDER - exponent (power of two) determining how 14 | * many pages to allocate in a high-order page for skb pages (e.g., 15 | * 2 means allocate in units of 4 pages). 16 | */ 17 | #define HOMA_SKB_PAGE_ORDER 4 18 | 19 | /** 20 | * define HOMA_SKB_PAGE_SIZE - number of bytes corresponding to HOMA_PAGE_ORDER. 21 | */ 22 | #define HOMA_SKB_PAGE_SIZE (PAGE_SIZE << HOMA_SKB_PAGE_ORDER) 23 | 24 | /** 25 | * struct homa_page_pool - A cache of free pages available for use in tx skbs. 26 | * Each page is of size HOMA_SKB_PAGE_SIZE, and a pool is dedicated for 27 | * use by a single NUMA node. Access to these objects is synchronized with 28 | * @homa->page_pool_mutex. 29 | */ 30 | struct homa_page_pool { 31 | /** @avail: Number of free pages currently in the pool. */ 32 | int avail; 33 | 34 | /** 35 | * @low_mark: Low water mark: smallest value of avail since the 36 | * last time homa_skb_release_pages reset it. 37 | */ 38 | int low_mark; 39 | 40 | #define HOMA_PAGE_POOL_SIZE 1000 41 | 42 | /** 43 | * @pages: Pointers to pages that are currently free; the ref count 44 | * is 1 in each of these pages. 45 | */ 46 | struct page *pages[HOMA_PAGE_POOL_SIZE]; 47 | }; 48 | 49 | /** 50 | * struct homa_skb_core - Stores core-specific information related to 51 | * sk_buff allocation. All values are assumed to be zero initially. 52 | */ 53 | struct homa_skb_core { 54 | /** 55 | * @pool: NUMA-specific page pool from which to allocate skb pages 56 | * for this core. 57 | */ 58 | struct homa_page_pool *pool; 59 | 60 | /** 61 | * @skb_page: a page of data available being used for skb frags. 62 | * This pointer is included in the page's reference count. 63 | */ 64 | struct page *skb_page; 65 | 66 | /** 67 | * @page_inuse: offset of first byte in @skb_page that hasn't already 68 | * been allocated. 69 | */ 70 | int page_inuse; 71 | 72 | /** @page_size: total number of bytes available in @skb_page. */ 73 | int page_size; 74 | 75 | /* Maximum number of stashed pages that can be consumed by a message 76 | * of a given size (assumes page_inuse is 0). This is a rough guess, 77 | * since it doesn't consider all of the data_segments that will be 78 | * needed for the packets. 79 | */ 80 | #define HOMA_MAX_STASHED(size) ((((size) - 1) / HOMA_SKB_PAGE_SIZE) + 1) 81 | 82 | /** 83 | * @num_stashed_pages: number of pages currently available in 84 | * stashed_pages. 85 | */ 86 | int num_stashed_pages; 87 | 88 | /** 89 | * @stashed_pages: use to prefetch from the cache all of the pages a 90 | * message will need with a single operation, to avoid having to 91 | * synchronize separately for each page. Note: these pages are all 92 | * HOMA_SKB_PAGE_SIZE in length. 93 | */ 94 | struct page *stashed_pages[HOMA_MAX_STASHED(HOMA_MAX_MESSAGE_LENGTH)]; 95 | }; 96 | DECLARE_PER_CPU(struct homa_skb_core, homa_skb_core); 97 | 98 | int homa_skb_append_from_iter(struct homa *homa, 99 | struct sk_buff *skb, struct iov_iter *iter, 100 | int length); 101 | int homa_skb_append_from_skb(struct homa *homa, 102 | struct sk_buff *dst_skb, 103 | struct sk_buff *src_skb, int offset, 104 | int length); 105 | int homa_skb_append_to_frag(struct homa *homa, struct sk_buff *skb, 106 | void *buf, int length); 107 | void homa_skb_cache_pages(struct homa *homa, struct page **pages, 108 | int count); 109 | void homa_skb_cleanup(struct homa *homa); 110 | void *homa_skb_extend_frags(struct homa *homa, struct sk_buff *skb, 111 | int *length); 112 | void homa_skb_free_tx(struct homa *homa, struct sk_buff *skb); 113 | void homa_skb_free_many_tx(struct homa *homa, struct sk_buff **skbs, 114 | int count); 115 | void homa_skb_get(struct sk_buff *skb, void *dest, int offset, 116 | int length); 117 | int homa_skb_init(struct homa *homa); 118 | struct sk_buff *homa_skb_new_tx(int length); 119 | bool homa_skb_page_alloc(struct homa *homa, 120 | struct homa_skb_core *core); 121 | void homa_skb_release_pages(struct homa *homa); 122 | void homa_skb_stash_pages(struct homa *homa, int length); 123 | 124 | #endif /* _HOMA_SKB_H */ 125 | -------------------------------------------------------------------------------- /homa_stub.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-2-Clause */ 2 | 3 | /* This file contains stripped-down replacements that have been 4 | * temporarily removed from Homa during the Linux upstreaming 5 | * process. By the time upstreaming is complete this file will 6 | * have gone away. 7 | */ 8 | 9 | #ifndef _HOMA_STUB_H 10 | #define _HOMA_STUB_H 11 | 12 | #include "homa_impl.h" 13 | 14 | static inline int homa_skb_append_from_iter(struct homa *homa, 15 | struct sk_buff *skb, 16 | struct iov_iter *iter, int length) 17 | { 18 | char *dst = skb_put(skb, length); 19 | 20 | if (copy_from_iter(dst, length, iter) != length) 21 | return -EFAULT; 22 | return 0; 23 | } 24 | 25 | static inline int homa_skb_append_to_frag(struct homa *homa, 26 | struct sk_buff *skb, void *buf, 27 | int length) 28 | { 29 | char *dst = skb_put(skb, length); 30 | 31 | memcpy(dst, buf, length); 32 | return 0; 33 | } 34 | 35 | static inline int homa_skb_append_from_skb(struct homa *homa, 36 | struct sk_buff *dst_skb, 37 | struct sk_buff *src_skb, 38 | int offset, int length) 39 | { 40 | return homa_skb_append_to_frag(homa, dst_skb, 41 | skb_transport_header(src_skb) + offset, length); 42 | } 43 | 44 | static inline void homa_skb_free_tx(struct homa *homa, struct sk_buff *skb) 45 | { 46 | kfree_skb(skb); 47 | } 48 | 49 | static inline void homa_skb_free_many_tx(struct homa *homa, 50 | struct sk_buff **skbs, int count) 51 | { 52 | int i; 53 | 54 | for (i = 0; i < count; i++) 55 | kfree_skb(skbs[i]); 56 | } 57 | 58 | static inline void homa_skb_get(struct sk_buff *skb, void *dest, int offset, 59 | int length) 60 | { 61 | memcpy(dest, skb_transport_header(skb) + offset, length); 62 | } 63 | 64 | static inline struct sk_buff *homa_skb_new_tx(int length) 65 | { 66 | struct sk_buff *skb; 67 | 68 | skb = alloc_skb(HOMA_SKB_EXTRA + HOMA_IPV6_HEADER_LENGTH + 69 | sizeof(struct homa_skb_info) + length, 70 | GFP_KERNEL); 71 | if (likely(skb)) { 72 | skb_reserve(skb, HOMA_SKB_EXTRA + HOMA_IPV6_HEADER_LENGTH); 73 | skb_reset_transport_header(skb); 74 | } 75 | return skb; 76 | } 77 | 78 | static inline void homa_skb_stash_pages(struct homa *homa, int length) 79 | {} 80 | 81 | #endif /* _HOMA_STUB_H */ 82 | -------------------------------------------------------------------------------- /man/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile to build man pages for Homa. 2 | 3 | SRCS := homa.7 \ 4 | homa_abort.3 \ 5 | homa_reply.3 \ 6 | homa_send.3 \ 7 | recvmsg.2 \ 8 | sendmsg.2 9 | 10 | PDFS := $(patsubst %.2,%.pdf,$(SRCS)) 11 | PDFS := $(patsubst %.3,%.pdf,$(PDFS)) 12 | PDFS := $(patsubst %.7,%.pdf,$(PDFS)) 13 | all: $(PDFS) 14 | 15 | clean: 16 | rm -f *.pdf 17 | 18 | # Note: in the rules below, it doesn't seem to work to eliminate the 19 | # temporary file and use ps2pdf in a pipeline; as of 12/2024, under 20 | # Cygwin, this produces blank output for some man pages under some 21 | # conditions. 22 | %.pdf: %.2 23 | groff -man -Tps $< > tmp.ps 24 | ps2pdf tmp.ps $@ 25 | rm tmp.ps 26 | 27 | %.pdf: %.3 28 | groff -man -Tps $< > tmp.ps 29 | ps2pdf tmp.ps $@ 30 | rm tmp.ps 31 | 32 | %.pdf: %.7 33 | groff -man -Tps $< > tmp.ps 34 | ps2pdf tmp.ps $@ 35 | rm tmp.ps 36 | 37 | # The following target is useful for debugging Makefiles; it 38 | # prints the value of a make variable. 39 | print-%: 40 | @echo $* = $($*) -------------------------------------------------------------------------------- /man/homa_abort.3: -------------------------------------------------------------------------------- 1 | .TH HOMA_ABORT 3 2022-9-15 "Homa" "Linux Programmer's Manual" 2 | .SH NAME 3 | homa_abort \- terminate an outgoing RPC 4 | .SH SYNOPSIS 5 | .nf 6 | .B #include 7 | .PP 8 | .BI "int homa_abort(int " sockfd ", uint64_t " id ", int " error ); 9 | .PP 10 | .BI "int homa_abortp(int " sockfd ", struct homa_abort_args *" args ); 11 | .fi 12 | .SH DESCRIPTION 13 | These two functions will cancel the execution of one (or all) outgoing RPCs. 14 | They behave identically except that 15 | .BR homa_abort 16 | receives its arguments as separate parameters, whereas 17 | .BR homa_abortp 18 | packs all of the arguments into a structure: 19 | .PP 20 | .in +4n 21 | .ps -1 22 | .vs -2 23 | .EX 24 | struct homa_abort_args { 25 | uint64_t id; 26 | int error; 27 | }; 28 | .EE 29 | .vs +2 30 | .ps +1 31 | .in 32 | .PP 33 | The 34 | .I id 35 | argument contains the identifier for an RPC; if this RPC is active on 36 | .IR sockfd 37 | then it is aborted. 38 | If 39 | .I id 40 | is 0 then all outgoing RPCs on 41 | .IR sockfd 42 | will be aborted. 43 | If 44 | .I error 45 | is 0, then the matching RPCs will be deleted and all state associated 46 | with them will be freed (the RPCs will not 47 | be returned by 48 | .BR homa_recv ). 49 | If 50 | .I error 51 | is nonzero, then the RPC(s) will immediately be placed in the completed 52 | state so that they can be returned by 53 | .BR homa_recv ; 54 | the 55 | .BR homa_recv 56 | call will return an error, with an 57 | .I errno 58 | value of 59 | .I error. 60 | Regardless of whether the RPC(s) are completed or freed, the 61 | servers for the RPCs 62 | are not notified of the abort. If a 63 | request has already been transmitted to the server at the time 64 | .B homa_abort 65 | is invoked, it may still be executed on the server. Any response 66 | from the server will be discarded. 67 | 68 | .SH RETURN VALUE 69 | On success, the return value is 0. 70 | On error, \-1 is returned and 71 | .I errno 72 | is set appropriately. 73 | 74 | .SH ERRORS 75 | .TP 76 | .B EALREADY 77 | .I error 78 | and 79 | .I id 80 | were both nonzero, but the RPC was already in the completed state. In this 81 | case the system call has no effect. 82 | .TP 83 | .B EFAULT 84 | An invalid user space address was specified for an argument. 85 | .TP 86 | .B EINVAL 87 | There is no RPC corresponding to 88 | .IR id . 89 | .SH SEE ALSO 90 | .BR homa_recv (3), 91 | .BR homa_reply (3), 92 | .BR homa_send (3), 93 | .BR homa (7) 94 | -------------------------------------------------------------------------------- /man/homa_reply.3: -------------------------------------------------------------------------------- 1 | .TH HOMA_REPLY 3 2024-11-11 "Homa" "Linux Programmer's Manual" 2 | .SH NAME 3 | homa_reply, homa_replyv \- send a Homa response message 4 | .SH SYNOPSIS 5 | .nf 6 | .B #include 7 | .PP 8 | .BI "int homa_reply(int " sockfd ", const void *" message_buf ", size_t " \ 9 | length , 10 | .BI " const struct sockaddr *" dest_addr ", size_t " \ 11 | addrlen , 12 | .BI " uint64_t " id );> 13 | .PP 14 | .BI "int homa_replyv(int " sockfd ", const struct iovec *" iov ", size_t " \ 15 | iovcnt , 16 | .BI " const struct sockaddr *" dest_addr ", size_t " \ 17 | addrlen , 18 | .BI " uint64_t " id ); 19 | .fi 20 | .SH DESCRIPTION 21 | .BR homa_reply 22 | and 23 | .BR homa_replyv 24 | are convenience functions layered on top of the 25 | .B sendmsg 26 | system call. 27 | Either may be used to transmit a response message using the Homa 28 | transport protocol. 29 | The argument 30 | .I sockfd 31 | is the file descriptor of a Homa socket to use for sending the response. 32 | With 33 | .BR homa_reply 34 | the response message is stored in a single contiguous buffer pointed to by 35 | .IR message_buf , 36 | and the argument 37 | .I length 38 | gives the length of the message in bytes. 39 | With 40 | .BR homa_replyv 41 | the response message consists of multiple disjoint chunks, specified 42 | by 43 | .I iovcnt 44 | descriptors at 45 | .IR iov . 46 | In either case the total message length must not exceed 47 | .BR HOMA_MAX_MESSAGE_LENGTH . 48 | The destination for the response is given by 49 | .IR dest_addr , 50 | which can hold either an IPv4 or an IPv6 address. The length 51 | of the address is given by 52 | .IR addrlen . 53 | The argument 54 | .I id 55 | is an identifier previously returned by 56 | .BR recvmsg (2); 57 | along with 58 | .IR dest_addr , 59 | it identifies the request for which this message is the response. 60 | .PP 61 | This function returns as soon as the response has been queued for 62 | transmission. 63 | .SH RETURN VALUE 64 | On success, the return value is 0. 65 | On error, \-1 is returned and 66 | .I errno 67 | is set appropriately. 68 | .SH ERRORS 69 | See 70 | .BR sendmsg (2) 71 | for details on the 72 | .I errno 73 | values returned after errors. 74 | .SH SEE ALSO 75 | .BR recvmsg (2), 76 | .BR sendmsg (2), 77 | .BR homa_abort (3), 78 | .BR homa_send (3), 79 | .BR homa (7) 80 | -------------------------------------------------------------------------------- /man/homa_send.3: -------------------------------------------------------------------------------- 1 | .TH HOMA_SEND 3 2024-11-11 "Homa" "Linux Programmer's Manual" 2 | .SH NAME 3 | homa_send, homa_sendv \- send a request message 4 | .SH SYNOPSIS 5 | .nf 6 | .B #include 7 | .PP 8 | .BI "int homa_send(int " sockfd ", const void *" message_buf ", size_t " length \ 9 | ", const struct sockaddr *" dest_addr ", 10 | .BI " size_t " addrlen ", uint64_t *" id ", uint64_t " \ 11 | "completion_cookie" ); 12 | .PP 13 | .BI "int homa_sendv(int " sockfd ", const struct iovec *" iov ", size_t " \ 14 | iovcnt ", const sockaddr *" dest_addr , 15 | .BI " size_t " addrlen ", uint64_t *" id ", uint64_t " \ 16 | "completion_cookie" ); 17 | .fi 18 | .SH DESCRIPTION 19 | .BR homa_send 20 | and 21 | .BR homa_sendv 22 | are convenience functions layered on top of the 23 | .B sendmsg 24 | system call. 25 | Either may be used to transmit a request message using the Homa 26 | transport protocol. 27 | The argument 28 | .I sockfd 29 | is the file descriptor of the sending socket; this must be a Homa socket. 30 | With 31 | .BR homa_send 32 | the request message is stored in a single contiguous buffer pointed to by 33 | .IR message_buf , 34 | and the argument 35 | .I length 36 | gives the length of the message in bytes. 37 | With 38 | .BR homa_sendv 39 | the request message consists of multiple disjoint chunks, specified 40 | by 41 | .I iovcnt 42 | descriptors at 43 | .IR iov . 44 | In either case, the total message length must not exceed 45 | .BR HOMA_MAX_MESSAGE_LENGTH . 46 | The destination socket for the request is given by 47 | .IR dest_addr , 48 | which can hold either an IPv4 or IPv6 address. The length of 49 | the address is given by 50 | .IR addrlen . 51 | If 52 | .I id 53 | is not NULL, an identifier for the request is returned at 54 | .IR *id. 55 | The identifier will be unique among all requests issued on 56 | .IR sockfd , 57 | and can be used to match the request with a response returned later by 58 | .BR homa_reply (3). 59 | The 60 | .I completion_cookie 61 | argument provides application-specific identifying information about the RPC, 62 | such as the address of a data structure used to manage the 63 | RPC; it will be returned by 64 | .BR homa_recv 65 | when the RPC completes. 66 | .PP 67 | This function returns as soon as the message has been queued for 68 | transmission. 69 | 70 | .SH RETURN VALUE 71 | On success, the return value is 0 and an identifier for the request 72 | is stored in 73 | .I *id 74 | (if 75 | .I id 76 | is not NULL). 77 | The identifier can be used later to match the request 78 | with the corresponding response, using 79 | .BR homa_reply (3). 80 | On error, \-1 is returned and 81 | .I errno 82 | is set appropriately. 83 | .SH ERRORS 84 | After an error return, 85 | .I errno 86 | will contain additional information about the cause of the error. 87 | See 88 | .BR sendmsg (2) 89 | for details. 90 | .SH SEE ALSO 91 | .BR recvmsg (2), 92 | .BR sendmsg (2), 93 | .BR homa_abort (3), 94 | .BR homa_reply (3), 95 | .BR homa (7) 96 | -------------------------------------------------------------------------------- /man/sendmsg.2: -------------------------------------------------------------------------------- 1 | .TH SENDMSG 2 2023-11-2 "Homa" "Linux Programmer's Manual" 2 | .SH NAME 3 | sendmsg \- send a Homa request or response message 4 | .SH SYNOPSIS 5 | .nf 6 | .B #include 7 | .PP 8 | .BI "ssize_t sendmsg(int " sockfd ", const struct msghdr *" msg ", int " flags ); 9 | .fi 10 | .SH DESCRIPTION 11 | The 12 | .B sendmsg 13 | kernel call is used to send request and response messages on Homa sockets. 14 | The 15 | .I sockfd 16 | argument must refer to a Homa socket. The 17 | .I msg 18 | argument describes the message to send and the destination where it 19 | should be sent (more details below). The 20 | .I flags 21 | argument is not used for Homa messages. 22 | .PP 23 | The 24 | .B msg 25 | argument must point to a structure of the following type: 26 | .PP 27 | .in +4n 28 | .ps -1 29 | .vs -2 30 | .EX 31 | struct msghdr { 32 | void *msg_name; /* Address of destination (sockaddr_in 33 | * or sockaddr_in6). 34 | socklen_t msg_namelen; /* Size of address. */ 35 | struct iovec *msg_iov; /* Message contents: one or more extents. */ 36 | size_t msg_iovlen; /* Number of elements in msg_iov. */ 37 | void *msg_control; /* Address of homa_sendmsg_args struct. */ 38 | size_t msg_controllen; /* Must always be zero (if not, sendmsg will 39 | * fail with EINVAL, for arcane reasons). */ 40 | int msg_flags; /* Not used by Homa. */ 41 | }; 42 | .EE 43 | .vs +2 44 | .ps +1 45 | .in 46 | .PP 47 | Homa requires additional information beyond what can be represented in a 48 | .BR "struct msghdr" , 49 | so the 50 | .B msg_control 51 | field must refer to a structure of the following type: 52 | .PP 53 | .in +4n 54 | .ps -1 55 | .vs -2 56 | .EX 57 | struct homa_sendmsg_args { 58 | uint64_t id; /* RPC identifier. */ 59 | uint64_t completion_cookie; /* For requests only; value to return 60 | * along with response. */ 61 | }; 62 | .EE 63 | .vs +2 64 | .ps +1 65 | .in 66 | .PP 67 | If the 68 | .B id 69 | field of the 70 | .B homa_sendmsg_args 71 | is initially 0, then a new RPC will be created and a request message 72 | will be sent as described by 73 | .I msg\c 74 | ->\c 75 | .B msg_name 76 | and 77 | .IR msg ->\c 78 | .BR msg_iov ; 79 | the 80 | .B id 81 | field will be overwritten with the identifier of the new RPC, which is 82 | unique among all RPCs initiated via 83 | .IR sockfd . 84 | This identifier can be used to wait for the response with 85 | .BR recvmsg . 86 | If 87 | .B id 88 | is nonzero, then a response message will be sent for the RPC identified 89 | uniquely by 90 | .B id 91 | and 92 | .IR msg ->\c 93 | .BR msg_name . 94 | .PP 95 | .B sendmsg 96 | returns as soon as the message has been queued for transmission. 97 | .SH RETURN VALUE 98 | The return value is 0 for success and -1 if an error occurred. 99 | .SH ERRORS 100 | .PP 101 | When 102 | .B sendmsg 103 | returns an error, 104 | .I errno 105 | will have one of the following values: 106 | .TP 107 | .B EAFNOSUPPORT 108 | The address family specified by 109 | .I dest_addr 110 | does not match the address family specified when the socket was opened 111 | .RB ( AF_INET 112 | or 113 | .BR AF_INET6 ). 114 | .TP 115 | .B EBADF 116 | .I sockfd 117 | is not a valid open file descriptor. 118 | .TP 119 | .B EFAULT 120 | An invalid user space address was specified for an argument. 121 | .TP 122 | .B EINVAL 123 | The size of the request exceeded 124 | .B HOMA_MAX_MESSAGE_LENGTH, or 125 | .I sockfd 126 | was not a Homa socket, or a nonzero completion cookie was specified 127 | for a response message, or the 128 | .B id 129 | for a response message does not match an existing RPC for which a 130 | request message has been received. 131 | .TP 132 | .B ENOMEM 133 | Memory could not be allocated for internal data structures needed 134 | for the message. 135 | .TP 136 | .B ESHUTDOWN 137 | The socked has been disabled using 138 | .BR shutdown (2). 139 | .SH SEE ALSO 140 | .BR recvmsg (2), 141 | .BR homa_abort (3), 142 | .BR homa_reply (3), 143 | .BR homa_send (3), 144 | .BR homa (7) 145 | -------------------------------------------------------------------------------- /perf/README.md: -------------------------------------------------------------------------------- 1 | This directory contains various performance measurements of the 2 | Linux kernel implementation of the Homa transport protocol. 3 | -------------------------------------------------------------------------------- /perf/plot_length_cdf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright (c) 2019-2020 Homa Developers 4 | # SPDX-License-Identifier: BSD-1-Clause 5 | 6 | # This script generates a plot showing the CDF of message lengths, 7 | # gathered from one or more experiment runs. 8 | # 9 | # Usage: plot_length_dist.py name pattern name pattern ... 10 | # 11 | # Each "name" argument gives the name a workload, which will appear in the 12 | # graph keys. Each "pattern" argument gives a glob string (such as 13 | # ("logs/w1/loaded*.txt") describine one or more data files that measure 14 | # the distribution. 15 | 16 | import glob 17 | import matplotlib.pyplot as plt 18 | import numpy as np 19 | import os 20 | import string 21 | import sys 22 | 23 | # Keys are message lengths, values are number of messages of that length. 24 | counts = {} 25 | 26 | def read_rtts(file, column): 27 | """ 28 | Read file and add its data to the counts array. The "column" argument 29 | indicates which argument of each line contains the message length. 30 | """ 31 | global counts 32 | 33 | print("Reading %s" % file) 34 | f = open(file, "r") 35 | for line in f: 36 | stripped = line.strip() 37 | if stripped[0] == '#': 38 | continue 39 | words = stripped.split() 40 | if (len(words) < (column+1)): 41 | print("Line too short (no column %d): '%s'" % (line, column)) 42 | continue 43 | size = int(words[column]) 44 | if size in counts: 45 | counts[size] += 1 46 | else: 47 | counts[size] = 1 48 | f.close() 49 | 50 | if (len(sys.argv) < 3) or not (len(sys.argv) & 1): 51 | print("Usage: %s name pattern name pattern ..." % (sys.argv[0])) 52 | exit(1) 53 | 54 | workloads = [] 55 | for i in range(1, len(sys.argv), 2): 56 | info = {} 57 | info["name"] = sys.argv[i] 58 | pattern = sys.argv[i+1] 59 | 60 | counts = {} 61 | got_data = False 62 | for f in glob.glob(pattern): 63 | read_rtts(f, 0) 64 | got_data = True 65 | if not got_data: 66 | print("Couldn't find any files corresponding to '%s'" % (pattern)) 67 | continue 68 | 69 | info["total_msgs"] = 0.0 70 | info["total_bytes"] = 0.0 71 | 72 | for length in counts: 73 | info["total_msgs"] += counts[length] 74 | info["total_bytes"] += length*counts[length] 75 | 76 | lengths = sorted(counts.keys()) 77 | messages = 0 78 | bytes = 0 79 | info["x"] = [] 80 | info["cum_msgs"] = [] 81 | info["cum_bytes"] = [] 82 | for l in lengths: 83 | info["x"].append(l) 84 | info["cum_msgs"].append(messages) 85 | info["cum_bytes"].append(bytes) 86 | messages += counts[l]/info["total_msgs"] 87 | bytes += (l * counts[l])/info["total_bytes"] 88 | info["x"].append(l) 89 | info["cum_msgs"].append(messages) 90 | info["cum_bytes"].append(bytes) 91 | # print("Length %d, CF messages %.2f, CF bytes %.2f" % ( 92 | # l, messages, bytes)) 93 | workloads.append(info) 94 | 95 | plt.subplot(211) 96 | plt.axis([10, 1500000, 0, 1.0]) 97 | plt.xscale("log") 98 | plt.xlabel("Message Length") 99 | plt.ylabel("Cum. Frac. Messages") 100 | plt.grid(which="major", axis="both") 101 | 102 | for w in workloads: 103 | plt.plot(w["x"], w["cum_msgs"], label=w["name"]) 104 | plt.legend() 105 | 106 | plt.subplot(212) 107 | plt.axis([10, 1500000, 0, 1.0]) 108 | plt.xscale("log") 109 | plt.xlabel("Message Length") 110 | plt.ylabel("Cum. Frac. Bytes") 111 | plt.grid(which="major", axis="both") 112 | 113 | for w in workloads: 114 | print("Plotting workload %s" % (w["name"])) 115 | plt.plot(w["x"], w["cum_bytes"], label=w["name"]) 116 | plt.legend() 117 | 118 | plt.savefig('length.pdf') -------------------------------------------------------------------------------- /perf/rtt.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PlatformLab/HomaModule/c4f579f9a83728baeb638a6f11c15e8f0ddf65d8/perf/rtt.xlsx -------------------------------------------------------------------------------- /reap.txt: -------------------------------------------------------------------------------- 1 | This file discusses issues related to freeing resources for completed RPCs 2 | ("reaping"). 3 | 4 | * Most of the cost of reaping comes from freeing skbuffs; this can be 5 | quite expensive for RPCs with long messages. 6 | 7 | * The natural time to reap is when homa_rpc_free is invoked to mark an 8 | RPC completed, but this can result in severe performance hiccups. For 9 | example, a server RPC is freed once the last packet of the response 10 | has been transmitted, but this can happen in homa_softirq in response 11 | to an incoming grant, and there may be other short messages waiting 12 | to be processed. Freeing a long RPC could result in significant delay 13 | for a subsequent short RPC. 14 | 15 | * Thus Homa doesn't reap immediately in homa_rpc_free. Instead, dead RPCs 16 | are queued up and reaping occurs later, at a more convenient time where 17 | it is less likely to impact latency. The challenge is to figure out how to 18 | do this so that (a) we keep up with dead RPCs and (b) we minimize 19 | the impact of reaping on latency. 20 | 21 | * The ideal time to reap is when threads are waiting for incoming messages 22 | in homa_wait_for_message. The thread has nothing else to do, so reaping 23 | can be performed with no latency impact on the application. However, 24 | if a machine is overloaded then it may never wait, so this mechanism 25 | isn't always sufficient. 26 | 27 | * Homa now reaps in two other places, if homa_wait_for_message can't 28 | keep up: 29 | * If dead_buffs_limit dead skbs accumulate, then homa_timer will 30 | reap to get down to that limit. However, it seems possible that 31 | there may be cases where a single thread cannot keep up with all 32 | the reaping to be done. 33 | * If homa_timer can't keep up, then as a last resort, homa_pkt_dispatch 34 | will reap a few buffers for every incoming data packet. This is undesirable 35 | because it will impact Homa's performance. 36 | 37 | * In addition, during the conversion to the new input buffering scheme for 2.0, 38 | freeing of packets for incoming messages was moved to homa_copy_to_user, 39 | under the assumption that this code wouldn't be on the critical path. 40 | However, right now the packet freeing is taking 20-25% of the total 41 | time in that function, and with faster networks it's quite possible that 42 | this code will indeed be on the critical path. So, it probably shouldn't 43 | be doing packet freeing after all. 44 | 45 | * Here are some approaches that have been tried and eventually abandoned: 46 | * Occasionally when data packets arrive, reap if too much dead info has 47 | accumulated. This will cause a latency impact. The amount to reap is 48 | chosen dynamically (by homa_timer) to be as small as possible while 49 | gradually working through the backlog. Unfortunately, the formula for 50 | computing how much to reap was fragile and resulted in situations where 51 | the backlog of dead RPCs grew without bound. This approach was abandoned 52 | in October 2021. 53 | -------------------------------------------------------------------------------- /rsync-exclude.txt: -------------------------------------------------------------------------------- 1 | # This file lists directories and files that should not be copied 2 | # to rcmaster by rsync. 3 | .git 4 | nbproject 5 | private 6 | cloudlab 7 | reports 8 | *traces* 9 | bytedance 10 | mle 11 | __pycache__ 12 | *.data 13 | *.pyc 14 | *.pdf 15 | *.tt 16 | *.log 17 | tmp* 18 | *.out 19 | erfan 20 | -------------------------------------------------------------------------------- /sync.txt: -------------------------------------------------------------------------------- 1 | This file describes the synchronization strategy used for Homa. 2 | 3 | * In the Linux TCP/IP stack, the primary locking mechanism is a lock 4 | per socket. However, per-socket locks aren't adequate for Homa, because 5 | sockets are "larger" in Homa. In TCP, a socket corresponds to a single 6 | connection between the source and destination; an application can have 7 | hundreds or thousands of sockets open at once, so per-socket locks leave 8 | lots of opportunities for concurrency. With Homa, a single socket can be 9 | used for communicating with any number of peers, so there will typically 10 | be no more than one socket per thread. As a result, a single Homa socket 11 | must support many concurrent RPCs efficiently, and a per-socket lock would 12 | create a bottleneck (Homa tried this approach initially). 13 | 14 | * Thus, the primary lock used in Homa is a per-RPC spinlock. This allows operations 15 | on different RPCs to proceed concurrently. RPC locks are actually stored in 16 | the hash table buckets used to look them up. This is important because it 17 | makes looking up RPCs and locking them atomic. Without this approach it 18 | is possible that an RPC could get deleted after it was looked up but before 19 | it was locked. 20 | 21 | * Certain operations are not permitted while holding spinlocks, such as memory 22 | allocation and copying data to/from user space (spinlocks disable 23 | interrupts, so the holder must not block). RPC locks are spinlocks, 24 | and that results in awkward code in several places to move prohibited 25 | operations outside the locked regions. In particular, there is extra 26 | complexity to make sure that RPCs are not garbage-collected while these 27 | operations are occurring without a lock. 28 | 29 | * There are several other locks in Homa besides RPC locks. When multiple 30 | locks are held, they must always be acquired in a consistent order, in 31 | order to prevent deadlock. For each lock, here are the other locks that 32 | may be acquired while holding the given lock. 33 | * RPC: socket, grantable, throttle, peer->ack_lock 34 | * Socket: port_map.write_lock 35 | Any lock not listed above must be a "leaf" lock: no other lock will be 36 | acquired while holding the lock. 37 | 38 | * Homa's approach means that socket shutdown and deletion can potentially 39 | occur while operations are underway that hold RPC locks but not the socket 40 | lock. This creates several potential problems: 41 | * A socket might be deleted and its memory reclaimed while an RPC still 42 | has access to it. Home assumes that Linux will prevent socket deletion 43 | while the kernel call is executing. In situations outside kernel call 44 | handling, Homa uses rcu_read_lock to prevent socket deletion. 45 | * A socket might be shut down while there are active operations on 46 | RPCs. For example, a new RPC creation might be underway when a socket 47 | is shut down, which could add the new RPC after all of its RPCs 48 | have supposedly been deleted. Handling this requires careful ordering 49 | of operations during shutdown, plus the rest of Homa must be careful 50 | never to add new RPCs to a socket that has been shut down. 51 | 52 | * There are a few places where Homa needs to process RPCs on lists 53 | associated with a socket, such as the timer. Such code must first lock 54 | the socket (to synchronize access to the link pointers) then lock 55 | individual RPCs on the list. However, this violates the rules for locking 56 | order. It isn't safe to unlock the socket before locking the RPC, because 57 | the RPC could be deleted and its memory recycled between the unlock of the 58 | socket lock and the lock of the RPC; this could result in corruption. Homa 59 | uses a few different ways to handle this situation: 60 | * Use homa_protect_rpcs to prevent RPC reaping for a socket. RPCs can still 61 | be deleted, but their memory won't go away until homa_unprotect_rpcs is 62 | invoked. This allows the socket lock to be released before acquiring 63 | the RPC lock; after acquiring the RPC lock check to see if it has been 64 | deleted; if so, skip it. Note: the Linux RCU mechanism could have been 65 | used to achieve the same effect, but it results in *very* long delays 66 | before final reclamation (tens of ms), even without contention, which 67 | means that a large number of dead RPCs could accumulate. 68 | * Use spin_trylock_bh to acquire the RPC lock, while still holding the 69 | socket lock. If this fails, then release the socket lock, then retry 70 | both the socket lock and the RPC lock. 71 | 72 | * There are also a few places where Homa is doing something related to an 73 | RPC (such as copying message data to user space) and needs the RPC to stay 74 | around, but it isn't holding the RPC lock. In this situations, Homa sets 75 | a bit in rpc->flags and homa_rpc_reap will not reap RPCs with any of these 76 | flags set. -------------------------------------------------------------------------------- /test/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile to run unit tests for Homa 2 | 3 | KDIR ?= /lib/modules/$(shell uname -r)/build 4 | CC ?= gcc 5 | CXX ?= g++ 6 | PERL ?= perl 7 | ARCH ?= x86 8 | 9 | all: test 10 | 11 | KERN_INCLUDES := \ 12 | -I$(KDIR)/arch/x86/include \ 13 | -I$(KDIR)/arch/x86/include/generated \ 14 | -I$(KDIR)/include \ 15 | -I$(KDIR)/arch/x86/include/uapi \ 16 | -I$(KDIR)/arch/x86/include/generated/uapi \ 17 | -I$(KDIR)/include/uapi \ 18 | -I$(KDIR)/include/generated/uapi 19 | CINCLUDES := \ 20 | -I. \ 21 | -I.. \ 22 | $(KERN_INCLUDES) \ 23 | -include $(KDIR)/include/linux/kconfig.h 24 | CCINCLUDES := \ 25 | -I. \ 26 | -I.. \ 27 | $(KERN_KINCLUDES) 28 | 29 | DEFS := -D__KERNEL__ \ 30 | -D__UNIT_TEST__ \ 31 | -D KBUILD_MODNAME='"homa"' 32 | 33 | WARNS := -Wall -Wundef -Wno-trigraphs -Wno-sign-compare \ 34 | -Wno-strict-aliasing -Werror 35 | CFLAGS := $(WARNS) -Wstrict-prototypes -MD -g $(CINCLUDES) $(DEFS) 36 | CCFLAGS := -std=c++11 $(WARNS) -MD -g $(CCINCLUDES) $(DEFS) -fsanitize=address 37 | 38 | TEST_SRCS := unit_homa_grant.c \ 39 | unit_homa_incoming.c \ 40 | unit_homa_offload.c \ 41 | unit_homa_metrics.c \ 42 | unit_homa_outgoing.c \ 43 | unit_homa_peer.c \ 44 | unit_homa_pool.c \ 45 | unit_homa_plumbing.c \ 46 | unit_homa_rpc.c \ 47 | unit_homa_skb.c \ 48 | unit_homa_sock.c \ 49 | unit_homa_timer.c \ 50 | unit_homa_utils.c \ 51 | unit_timetrace.c 52 | TEST_OBJS := $(patsubst %.c,%.o,$(TEST_SRCS)) 53 | 54 | HOMA_SRCS := homa_grant.c \ 55 | homa_incoming.c \ 56 | homa_metrics.c \ 57 | homa_offload.c \ 58 | homa_outgoing.c \ 59 | homa_peer.c \ 60 | homa_pool.c \ 61 | homa_plumbing.c \ 62 | homa_rpc.c \ 63 | homa_skb.c \ 64 | homa_sock.c \ 65 | homa_timer.c \ 66 | homa_utils.c \ 67 | timetrace.c 68 | HOMA_OBJS := $(patsubst %.c,%.o,$(HOMA_SRCS)) 69 | 70 | OTHER_SRCS := ccutils.cc \ 71 | main.c \ 72 | mock.c \ 73 | utils.c 74 | OTHER_OBJS := $(patsubst %.c,%.o,$(patsubst %.cc,%.o,$(OTHER_SRCS))) 75 | 76 | OBJS := $(TEST_OBJS) $(HOMA_OBJS) $(OTHER_OBJS) 77 | 78 | CLEANS = unit $(OBJS) *.d .deps 79 | 80 | # This seems to be the only way to disable the built-in implicit rules 81 | # for %:%.c and %:%.cc. 82 | .SUFFIXES: 83 | 84 | %.o: ../%.c 85 | $(CC) -c $(CFLAGS) $< -o $@ 86 | %.e: ../%.c 87 | $(CC) -E $(CFLAGS) $< -o $@ 88 | %.o: %.c 89 | $(CC) -c $(CFLAGS) $< -o $@ 90 | %.e: %.c 91 | $(CC) -E $(CFLAGS) $< -o $@ 92 | %.o: %.cc 93 | $(CXX) -c $(CCFLAGS) $< -o $@ 94 | %.e: %.cc 95 | $(CXX) -E $(CCFLAGS) $< -o $@ 96 | 97 | unit: $(OBJS) 98 | $(CXX) $(CFLAGS) $^ -o $@ -lasan 99 | 100 | test: unit 101 | ./unit 102 | 103 | # Additional definitions for running unit tests using stripped sources. 104 | 105 | S_HOMA_SRCS := $(patsubst %,stripped/%,$(filter-out timetrace.c, $(HOMA_SRCS))) 106 | S_HOMA_OBJS := $(patsubst %.c,%.o,$(S_HOMA_SRCS)) 107 | S_HOMA_HDRS := stripped/homa.h \ 108 | stripped/homa_impl.h \ 109 | stripped/homa_peer.h \ 110 | stripped/homa_pool.h \ 111 | stripped/homa_receiver.h \ 112 | stripped/homa_rpc.h \ 113 | stripped/homa_sock.h \ 114 | stripped/homa_stub.h \ 115 | stripped/homa_wire.h 116 | stripped/%.c: ../%.c 117 | ../util/strip.py --alt $< > $@ 118 | stripped/%.h: ../%.h 119 | ../util/strip.py --alt $< > $@ 120 | S_TEST_OBJS := $(patsubst %,stripped/%,$(filter-out unit_timetrace.o, $(TEST_OBJS))) 121 | S_OBJS := $(S_HOMA_OBJS) $(S_TEST_OBJS) $(patsubst %,stripped/%,$(OTHER_OBJS)) 122 | 123 | $(S_OBJS): | stripped $(S_HOMA_HDRS) 124 | 125 | stripped: 126 | mkdir -p stripped 127 | 128 | stripped/%.o: stripped/%.c 129 | $(CC) -c $(patsubst -I..,-Istripped,$(CFLAGS)) $< -o $@ 130 | stripped/%.o: %.c 131 | $(CC) -c $(patsubst -I..,-Istripped,$(CFLAGS)) $< -o $@ 132 | stripped/%.o: %.cc 133 | $(CXX) -c $(patsubst -I..,-Istripped,$(CCFLAGS)) $< -o $@ 134 | 135 | s_unit: $(S_OBJS) 136 | $(CXX) $(CFLAGS) $^ -o $@ -lasan 137 | 138 | s_test: s_unit 139 | ./s_unit 140 | 141 | CLEANS += s_unit 142 | 143 | # The target below shouldn't be needed: theoretically, any code that is 144 | # sensitive to IPv4 vs. IPv6 should be tested explicitly, regardless of 145 | # the --ipv4 argument. 146 | test_both: unit 147 | @echo "Testing with IPv4 default:" 148 | ./unit --ipv4 149 | @echo "Testing with IPv6 default:" 150 | ./unit 151 | 152 | clean: 153 | rm -f $(CLEANS) 154 | rm -rf stripped 155 | 156 | # This magic (along with the -MD gcc option) automatically generates makefile 157 | # dependencies for header files included from C source files we compile, 158 | # and keeps those dependencies up-to-date every time we recompile. 159 | # See 'mergedep.pl' for more information. 160 | .deps: $(wildcard *.d stripped/*.d) 161 | @mkdir -p $(@D) 162 | $(PERL) mergedep.pl $@ $^ 163 | -include .deps 164 | 165 | # The following target is useful for debugging Makefiles; it 166 | # prints the value of a make variable. 167 | print-%: 168 | @echo $* = $($*) 169 | -------------------------------------------------------------------------------- /test/README.md: -------------------------------------------------------------------------------- 1 | This directory contains unit tests for the Homa Linux kernel module. 2 | Here are a few overall notes: 3 | 4 | * These are "white box" tests, not "black box" tests. Tests are written 5 | by looking at the code and writing enough tests to make sure all of the 6 | major code elements are covered. 7 | 8 | * The structure of the unit tests is isomorphic to the structure of the 9 | code: 10 | * There is one test file in this directory for each code file. For example, 11 | `unit_homa_incoming.c` contains unit tests for `../homa_incoming.c`. 12 | * Within the test file, there is a block of tests for each function in the 13 | corresponding code file, and the test blocks occur in the same order 14 | as the functions. If you move functions around, move the tests around 15 | to maintain isomorphism. 16 | * The tests for each function are ordered according to which lines of code 17 | in the function they test. Typically, a given test will test one or a few 18 | lines of the function. The order of the tests matches the order of the 19 | code ranges they test. With this approach, it's easy to scan the tests 20 | for a function after you make changes the see if you need to add more 21 | tests. 22 | * Some functions will have an initial test labeled "basic" or "sanity check". 23 | These initial tests may exercise a variety of features in the function; 24 | remaining tests only need to cover things not exercised by the initial 25 | test. 26 | 27 | * The name of a test indicates what function it is testing, and also gives 28 | a very terse synopsis of what is being tested. For example, consider this 29 | test from `homa_incoming.c`: 30 | ``` 31 | TEST_F(homa_incoming, homa_add_packet__packet_overlaps_message_end) 32 | { 33 | ... 34 | } 35 | ``` 36 | The name of the test is `homa_add_packet__packet_overlaps_message_end`; 37 | the test exercises the function `homa_add_packet`, and the particular 38 | case is a new arriving packet that extends past the end of the message. 39 | 40 | * In general, tests should be disaggregated so that each test only tests a small 41 | amount of functionality. Avoid large tests that test many different things. 42 | 43 | * In writing tests, focus on the control structure. For example, there should 44 | be tests for each branch of an `if` statement. For loops, be sure to 45 | include tests that involve multiple iterations of the loop. 46 | 47 | * You don't need to individually test each side effect of a collection of 48 | straight-line statements; testing one or two of them is fine. 49 | 50 | * The file `mock.c` mocks out Linux kernel functions invoked by the code 51 | being tested. Where relevant, the mocking code may record information about 52 | how it was invoked and/or allow for the injection of errors in results. 53 | 54 | * It should be possible to exercise virtually every line of code in Homa. 55 | If it appears that you cannot exercise a particular line, check to see 56 | whether `mock.c` has mechanisms you can use to get the desired effect. 57 | If not, consider extending `mock.c` to provide whatever you need. 58 | 59 | * Feel free to contact John Ousterhout if you're having trouble figuring out 60 | how to test a particular piece of code. 61 | -------------------------------------------------------------------------------- /test/ccutils.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2019-2022 Homa Developers 2 | * SPDX-License-Identifier: BSD-1-Clause 3 | */ 4 | 5 | /* Utility functions for unit tests, implemented in C++. */ 6 | 7 | #ifdef __cplusplus 8 | #define CEXTERN extern "C" 9 | #else 10 | #define CEXTERN extern 11 | #endif 12 | 13 | struct unit_hash; 14 | 15 | CEXTERN void unit_fill_data(unsigned char *data, int length, 16 | int first_value); 17 | CEXTERN void unit_hash_erase(struct unit_hash *hash, const void *key); 18 | CEXTERN void unit_hash_free(struct unit_hash *hash); 19 | CEXTERN void *unit_hash_get(struct unit_hash *hash, const void *key); 20 | CEXTERN struct unit_hash * 21 | unit_hash_new(void); 22 | CEXTERN void unit_hash_set(struct unit_hash *hash, const void *key, 23 | void *value); 24 | CEXTERN int unit_hash_size(struct unit_hash *hash); 25 | CEXTERN void unit_hook(char *id); 26 | CEXTERN void unit_hook_clear(void); 27 | CEXTERN void unit_hook_register(void hook_proc(char *id)); 28 | CEXTERN void unit_log_add_separator(char *sep); 29 | CEXTERN void unit_log_clear(void); 30 | CEXTERN void unit_log_data(const char *separator, unsigned char *data, 31 | int length); 32 | CEXTERN int unit_log_empty(void); 33 | CEXTERN const char *unit_log_get(void); 34 | CEXTERN void unit_log_printf(const char *separator, 35 | const char* format, ...) 36 | __attribute__((format(printf, 2, 3))); -------------------------------------------------------------------------------- /test/main.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: BSD-2-Clause 2 | 3 | /* Main program for running Homa unit tests. */ 4 | 5 | #include "homa_impl.h" 6 | #include "kselftest_harness.h" 7 | #include "mock.h" 8 | 9 | static char *helpMessage = 10 | "This program runs unit tests written in the Linux kernel kselftest style.\n" 11 | " Usage: %s options test_name test_name ...\n" 12 | "The following options are supported:\n" 13 | " --help or -h Print this message\n" 14 | " --ipv4 Simulate IPv4 for all packets (default: use IPv6)\n" 15 | " --verbose or -v Print the names of all tests as they run (default:\n" 16 | " print only tests that fail)\n" 17 | "If one or more test_name arguments are provided, then only those tests are\n" 18 | "run; if no test names are provided, then all tests are run.\n" 19 | "\n" 20 | "Note: the tests should provide complete coverage of both IPv4 and IPv6 without\n" 21 | "using the --ipv4 argument (code that depends on IPv4 vs. IPv6 already has\n" 22 | "special test cases for each); --ipv4 is provided for occasional double-checking.\n"; 23 | 24 | int main(int argc, char **argv) 25 | { 26 | int verbose = 0; 27 | int i; 28 | 29 | mock_ipv6_default = true; 30 | for (i = 1; i < argc; i++) { 31 | if ((strcmp(argv[i], "-h") == 0) || 32 | (strcmp(argv[i], "--help") == 0)) { 33 | printf(helpMessage, argv[0]); 34 | return 0; 35 | } else if (strcmp(argv[i], "--ipv4") == 0) { 36 | mock_ipv6_default = false; 37 | } else if ((strcmp(argv[i], "-v") == 0) || 38 | (strcmp(argv[i], "--verbose") == 0)) { 39 | verbose = 1; 40 | } else if (argv[i][0] == '-') { 41 | printf("Unknown option %s; type '%s --help' for help\n", 42 | argv[i], argv[0]); 43 | return 1; 44 | } else 45 | break; 46 | } 47 | test_harness_run(argc-i, argv+i, verbose); 48 | } 49 | -------------------------------------------------------------------------------- /test/mergedep.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # Copyright 2003 Bryan Ford 3 | # Distributed under the GNU General Public License. 4 | # 5 | # Usage: mergedep [ ...] 6 | # 7 | # This script merges the contents of all specified 8 | # on the command line into the single file , 9 | # which may or may not previously exist. 10 | # Dependencies in the will override 11 | # any existing dependencies for the same targets in . 12 | # The are deleted after is updated. 13 | # 14 | # The are typically generated by GCC with the -MD option, 15 | # and the is typically included from a Makefile, 16 | # as shown here for GNU 'make': 17 | # 18 | # .deps: $(wildcard *.d) 19 | # perl mergedep $@ $^ 20 | # -include .deps 21 | # 22 | # This script properly handles multiple dependencies per , 23 | # including dependencies having no target, 24 | # so it is compatible with GCC3's -MP option. 25 | # 26 | 27 | sub readdeps { 28 | my $filename = shift; 29 | 30 | open(DEPFILE, $filename) or return 0; 31 | while () { 32 | if (/([^:]*):([^\\:]*)([\\]?)$/) { 33 | my $target = $1; 34 | my $deplines = $2; 35 | my $slash = $3; 36 | while ($slash ne '') { 37 | $_ = ; 38 | defined($_) or die 39 | "Unterminated dependency in $filename"; 40 | /(^[ \t][^\\]*)([\\]?)$/ or die 41 | "Bad continuation line in $filename"; 42 | $deplines = "$deplines\\\n$1"; 43 | $slash = $2; 44 | } 45 | #print "DEPENDENCY [[$target]]: [[$deplines]]\n"; 46 | $dephash{$target} = $deplines; 47 | } elsif (/^[#]?[ \t]*$/) { 48 | # ignore blank lines and comments 49 | } else { 50 | die "Bad dependency line in $filename: $_"; 51 | } 52 | } 53 | close DEPFILE; 54 | return 1; 55 | } 56 | 57 | 58 | if ($#ARGV < 0) { 59 | print "Usage: mergedep [ ..]\n"; 60 | exit(1); 61 | } 62 | 63 | %dephash = (); 64 | 65 | # Read the main dependency file 66 | $maindeps = $ARGV[0]; 67 | readdeps($maindeps); 68 | 69 | # Read and merge in the new dependency files 70 | foreach $i (1 .. $#ARGV) { 71 | readdeps($ARGV[$i]) or die "Can't open $ARGV[$i]"; 72 | } 73 | 74 | # Update the main dependency file 75 | open(DEPFILE, ">$maindeps.tmp") or die "Can't open output file $maindeps.tmp"; 76 | foreach $target (keys %dephash) { 77 | print DEPFILE "$target:$dephash{$target}"; 78 | } 79 | close DEPFILE; 80 | rename("$maindeps.tmp", "$maindeps") or die "Can't overwrite $maindeps"; 81 | 82 | # Finally, delete the new dependency files 83 | foreach $i (1 .. $#ARGV) { 84 | unlink($ARGV[$i]) or print "Error removing $ARGV[$i]\n"; 85 | } 86 | 87 | -------------------------------------------------------------------------------- /test/mock.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2019-2022 Homa Developers 2 | * SPDX-License-Identifier: BSD-1-Clause 3 | */ 4 | 5 | /* Functions for mocking that are exported to test code. */ 6 | 7 | extern int mock_alloc_page_errors; 8 | extern int mock_alloc_skb_errors; 9 | extern int mock_bpage_size; 10 | extern int mock_bpage_shift; 11 | extern int mock_compound_order_mask; 12 | extern int mock_copy_data_errors; 13 | extern int mock_copy_to_user_dont_copy; 14 | extern int mock_copy_to_user_errors; 15 | extern int mock_cpu_idle; 16 | extern cycles_t mock_cycles; 17 | extern int mock_import_iovec_errors; 18 | extern int mock_import_ubuf_errors; 19 | extern int mock_ip6_xmit_errors; 20 | extern int mock_ip_queue_xmit_errors; 21 | extern bool mock_ipv6; 22 | extern bool mock_ipv6_default; 23 | extern int mock_kmalloc_errors; 24 | extern int mock_kthread_create_errors; 25 | extern int mock_register_protosw_errors; 26 | extern char mock_xmit_prios[]; 27 | extern int mock_log_rcu_sched; 28 | extern int mock_max_grants; 29 | extern int mock_max_skb_frags; 30 | extern int mock_mtu; 31 | extern struct net_device 32 | mock_net_device; 33 | extern __u64 mock_ns; 34 | extern __u64 mock_ns_tick; 35 | extern int mock_numa_mask; 36 | extern int mock_page_nid_mask; 37 | extern char mock_printk_output[]; 38 | extern int mock_route_errors; 39 | extern int mock_spin_lock_held; 40 | extern struct task_struct 41 | mock_task; 42 | extern int mock_trylock_errors; 43 | extern int mock_vmalloc_errors; 44 | extern int mock_xmit_log_verbose; 45 | extern int mock_xmit_log_homa_info; 46 | 47 | struct page * 48 | mock_alloc_pages(gfp_t gfp, unsigned order); 49 | int mock_check_error(int *errorMask); 50 | void mock_clear_xmit_prios(void); 51 | void mock_data_ready(struct sock *sk); 52 | cycles_t mock_get_cycles(void); 53 | unsigned int 54 | mock_get_mtu(const struct dst_entry *dst); 55 | void mock_get_page(struct page *page); 56 | int mock_page_refs(struct page *page); 57 | int mock_page_refs(struct page *page); 58 | void mock_put_page(struct page *page); 59 | void mock_rcu_read_lock(void); 60 | void mock_rcu_read_unlock(void); 61 | struct ctl_table_header * 62 | mock_register_net_sysctl(struct net *net, 63 | const char *path, 64 | struct ctl_table *table); 65 | void mock_set_core(int num); 66 | void mock_set_ipv6(struct homa_sock *hsk); 67 | void mock_spin_lock(spinlock_t *lock); 68 | void mock_spin_unlock(spinlock_t *lock); 69 | int mock_skb_count(void); 70 | struct sk_buff * 71 | mock_skb_new(struct in6_addr *saddr, struct homa_common_hdr *h, 72 | int extra_bytes, int first_value); 73 | void mock_sock_destroy(struct homa_sock *hsk, 74 | struct homa_socktab *socktab); 75 | void mock_sock_init(struct homa_sock *hsk, struct homa *homa, 76 | int port); 77 | void mock_teardown(void); 78 | void *mock_vmalloc(size_t size); 79 | -------------------------------------------------------------------------------- /test/unit_homa_metrics.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: BSD-2-Clause 2 | 3 | #include "homa_impl.h" 4 | #define KSELFTEST_NOT_MAIN 1 5 | #include "kselftest_harness.h" 6 | #include "ccutils.h" 7 | #include "mock.h" 8 | #include "utils.h" 9 | 10 | FIXTURE(homa_metrics) { 11 | struct homa homa; 12 | }; 13 | FIXTURE_SETUP(homa_metrics) 14 | { 15 | homa_init(&self->homa); 16 | global_homa = &self->homa; 17 | } 18 | FIXTURE_TEARDOWN(homa_metrics) 19 | { 20 | global_homa = NULL; 21 | homa_destroy(&self->homa); 22 | unit_teardown(); 23 | } 24 | 25 | TEST_F(homa_metrics, homa_metric_append) 26 | { 27 | self->homa.metrics_length = 0; 28 | homa_metric_append(&self->homa, "x: %d, y: %d", 10, 20); 29 | EXPECT_EQ(12, self->homa.metrics_length); 30 | EXPECT_STREQ("x: 10, y: 20", self->homa.metrics); 31 | 32 | homa_metric_append(&self->homa, ", z: %d", 12345); 33 | EXPECT_EQ(22, self->homa.metrics_length); 34 | EXPECT_STREQ("x: 10, y: 20, z: 12345", self->homa.metrics); 35 | EXPECT_EQ(30, self->homa.metrics_capacity); 36 | 37 | homa_metric_append(&self->homa, ", q: %050d", 88); 38 | EXPECT_EQ(77, self->homa.metrics_length); 39 | EXPECT_STREQ("x: 10, y: 20, z: 12345, q: 00000000000000000000000000000000000000000000000088", 40 | self->homa.metrics); 41 | EXPECT_EQ(120, self->homa.metrics_capacity); 42 | } 43 | TEST_F(homa_metrics, homa_metrics_open) 44 | { 45 | EXPECT_EQ(0, homa_metrics_open(NULL, NULL)); 46 | EXPECT_NE(NULL, self->homa.metrics); 47 | 48 | strcpy(self->homa.metrics, "12345"); 49 | EXPECT_EQ(0, homa_metrics_open(NULL, NULL)); 50 | EXPECT_EQ(5, strlen(self->homa.metrics)); 51 | EXPECT_EQ(2, self->homa.metrics_active_opens); 52 | } 53 | TEST_F(homa_metrics, homa_metrics_read__basics) 54 | { 55 | loff_t offset = 10; 56 | char buffer[1000]; 57 | 58 | self->homa.metrics = kmalloc(100, GFP_KERNEL); 59 | self->homa.metrics_capacity = 100; 60 | strcpy(self->homa.metrics, "0123456789abcdefghijklmnop"); 61 | self->homa.metrics_length = 26; 62 | EXPECT_EQ(5, homa_metrics_read(NULL, buffer, 5, &offset)); 63 | EXPECT_SUBSTR("_copy_to_user copied 5 bytes", unit_log_get()); 64 | EXPECT_EQ(15, offset); 65 | 66 | unit_log_clear(); 67 | EXPECT_EQ(11, homa_metrics_read(NULL, buffer, 1000, &offset)); 68 | EXPECT_SUBSTR("_copy_to_user copied 11 bytes", unit_log_get()); 69 | EXPECT_EQ(26, offset); 70 | 71 | unit_log_clear(); 72 | EXPECT_EQ(0, homa_metrics_read(NULL, buffer, 1000, &offset)); 73 | EXPECT_STREQ("", unit_log_get()); 74 | EXPECT_EQ(26, offset); 75 | } 76 | TEST_F(homa_metrics, homa_metrics_read__error_copying_to_user) 77 | { 78 | loff_t offset = 10; 79 | char buffer[1000]; 80 | 81 | self->homa.metrics = kmalloc(100, GFP_KERNEL); 82 | self->homa.metrics_capacity = 100; 83 | strcpy(self->homa.metrics, "0123456789abcdefghijklmnop"); 84 | self->homa.metrics_length = 26; 85 | mock_copy_to_user_errors = 1; 86 | EXPECT_EQ(EFAULT, -homa_metrics_read(NULL, buffer, 5, &offset)); 87 | } 88 | 89 | TEST_F(homa_metrics, homa_metrics_release) 90 | { 91 | self->homa.metrics_active_opens = 2; 92 | EXPECT_EQ(0, homa_metrics_release(NULL, NULL)); 93 | EXPECT_EQ(1, self->homa.metrics_active_opens); 94 | 95 | EXPECT_EQ(0, homa_metrics_release(NULL, NULL)); 96 | EXPECT_EQ(0, self->homa.metrics_active_opens); 97 | } 98 | -------------------------------------------------------------------------------- /test/unit_homa_utils.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: BSD-2-Clause 2 | 3 | #include "homa_impl.h" 4 | #define KSELFTEST_NOT_MAIN 1 5 | #include "kselftest_harness.h" 6 | #include "ccutils.h" 7 | #include "mock.h" 8 | #include "utils.h" 9 | 10 | #define n(x) htons(x) 11 | #define N(x) htonl(x) 12 | 13 | FIXTURE(homa_utils) { 14 | struct homa homa; 15 | }; 16 | FIXTURE_SETUP(homa_utils) 17 | { 18 | homa_init(&self->homa); 19 | unit_log_clear(); 20 | } 21 | FIXTURE_TEARDOWN(homa_utils) 22 | { 23 | homa_destroy(&self->homa); 24 | unit_teardown(); 25 | } 26 | 27 | /** 28 | * set_cutoffs() - A convenience method to allow all of the values in 29 | * homa->unsched_cutoffs to be set concisely. 30 | * @homa: Contains the unsched_cutoffs to be modified. 31 | * @c0: New value for homa->unsched_cutoffs[0] 32 | * @c1: New value for homa->unsched_cutoffs[1] 33 | * @c2: New value for homa->unsched_cutoffs[2] 34 | * @c3: New value for homa->unsched_cutoffs[3] 35 | * @c4: New value for homa->unsched_cutoffs[4] 36 | * @c5: New value for homa->unsched_cutoffs[5] 37 | * @c6: New value for homa->unsched_cutoffs[6] 38 | * @c7: New value for homa->unsched_cutoffs[7] 39 | */ 40 | static void set_cutoffs(struct homa *homa, int c0, int c1, int c2, 41 | int c3, int c4, int c5, int c6, int c7) 42 | { 43 | homa->unsched_cutoffs[0] = c0; 44 | homa->unsched_cutoffs[1] = c1; 45 | homa->unsched_cutoffs[2] = c2; 46 | homa->unsched_cutoffs[3] = c3; 47 | homa->unsched_cutoffs[4] = c4; 48 | homa->unsched_cutoffs[5] = c5; 49 | homa->unsched_cutoffs[6] = c6; 50 | homa->unsched_cutoffs[7] = c7; 51 | } 52 | 53 | TEST_F(homa_utils, homa_init__kmalloc_failure_for_port_map) 54 | { 55 | struct homa homa2; 56 | 57 | memset(&homa2, 0, sizeof(homa2)); 58 | mock_kmalloc_errors = 1; 59 | EXPECT_EQ(ENOMEM, -homa_init(&homa2)); 60 | EXPECT_EQ(NULL, homa2.port_map); 61 | homa_destroy(&homa2); 62 | } 63 | TEST_F(homa_utils, homa_init__kmalloc_failure_for_peers) 64 | { 65 | struct homa homa2; 66 | 67 | memset(&homa2, 0, sizeof(homa2)); 68 | mock_kmalloc_errors = 2; 69 | EXPECT_EQ(ENOMEM, -homa_init(&homa2)); 70 | EXPECT_NE(NULL, homa2.port_map); 71 | EXPECT_EQ(NULL, homa2.peers); 72 | homa_destroy(&homa2); 73 | } 74 | TEST_F(homa_utils, homa_init__homa_skb_init_failure) 75 | { 76 | struct homa homa2; 77 | 78 | memset(&homa2, 0, sizeof(homa2)); 79 | mock_kmalloc_errors = 4; 80 | EXPECT_EQ(ENOMEM, -homa_init(&homa2)); 81 | EXPECT_SUBSTR("Couldn't initialize skb management (errno 12)", 82 | mock_printk_output); 83 | homa_destroy(&homa2); 84 | } 85 | TEST_F(homa_utils, homa_init__cant_create_pacer_thread) 86 | { 87 | struct homa homa2; 88 | 89 | memset(&homa2, 0, sizeof(homa2)); 90 | mock_kthread_create_errors = 1; 91 | EXPECT_EQ(EACCES, -homa_init(&homa2)); 92 | EXPECT_EQ(NULL, homa2.pacer_kthread); 93 | homa_destroy(&homa2); 94 | } 95 | 96 | TEST_F(homa_utils, homa_print_ipv4_addr) 97 | { 98 | struct in6_addr test_addr1 = unit_get_in_addr("192.168.0.1"); 99 | struct in6_addr test_addr2 = unit_get_in_addr("1.2.3.4"); 100 | struct in6_addr test_addr3 = unit_get_in_addr("5.6.7.8"); 101 | char *p1, *p2; 102 | int i; 103 | 104 | p1 = homa_print_ipv6_addr(&test_addr1); 105 | p2 = homa_print_ipv6_addr(&test_addr2); 106 | EXPECT_STREQ("192.168.0.1", p1); 107 | EXPECT_STREQ("1.2.3.4", p2); 108 | 109 | /* Make sure buffers eventually did reused. */ 110 | for (i = 0; i < 20; i++) 111 | homa_print_ipv6_addr(&test_addr3); 112 | EXPECT_STREQ("5.6.7.8", p1); 113 | } 114 | 115 | TEST_F(homa_utils, homa_snprintf) 116 | { 117 | char buffer[50]; 118 | int used = 0; 119 | 120 | used = homa_snprintf(buffer, sizeof32(buffer), used, 121 | "Test message with values: %d and %d", 100, 1000); 122 | EXPECT_EQ(38, used); 123 | EXPECT_STREQ("Test message with values: 100 and 1000", buffer); 124 | 125 | used = homa_snprintf(buffer, sizeof32(buffer), used, 126 | "; plus: %d", 123456); 127 | EXPECT_EQ(49, used); 128 | EXPECT_STREQ("Test message with values: 100 and 1000; plus: 123", 129 | buffer); 130 | 131 | used = homa_snprintf(buffer, sizeof32(buffer), used, 132 | "more text, none of which fits"); 133 | EXPECT_EQ(49, used); 134 | EXPECT_STREQ("Test message with values: 100 and 1000; plus: 123", 135 | buffer); 136 | } 137 | 138 | TEST_F(homa_utils, homa_prios_changed__basics) 139 | { 140 | set_cutoffs(&self->homa, 90, 80, HOMA_MAX_MESSAGE_LENGTH*2, 60, 50, 141 | 40, 30, 0); 142 | self->homa.num_priorities = 6; 143 | homa_prios_changed(&self->homa); 144 | EXPECT_EQ(0, self->homa.unsched_cutoffs[6]); 145 | EXPECT_EQ(40, self->homa.unsched_cutoffs[5]); 146 | EXPECT_EQ(60, self->homa.unsched_cutoffs[3]); 147 | EXPECT_EQ(HOMA_MAX_MESSAGE_LENGTH*2, self->homa.unsched_cutoffs[2]); 148 | EXPECT_EQ(80, self->homa.unsched_cutoffs[1]); 149 | EXPECT_EQ(INT_MAX, self->homa.unsched_cutoffs[0]); 150 | EXPECT_EQ(1, self->homa.max_sched_prio); 151 | EXPECT_EQ(1, self->homa.cutoff_version); 152 | } 153 | TEST_F(homa_utils, homa_prios_changed__num_priorities_too_large) 154 | { 155 | self->homa.num_priorities = 100; 156 | homa_prios_changed(&self->homa); 157 | EXPECT_EQ(8, self->homa.num_priorities); 158 | } 159 | TEST_F(homa_utils, homa_prios_changed__share_lowest_priority) 160 | { 161 | set_cutoffs(&self->homa, 90, 80, 70, 60, 50, 40, 30, 0); 162 | self->homa.num_priorities = 7; 163 | homa_prios_changed(&self->homa); 164 | EXPECT_EQ(30, self->homa.unsched_cutoffs[6]); 165 | EXPECT_EQ(80, self->homa.unsched_cutoffs[1]); 166 | EXPECT_EQ(0x7fffffff, self->homa.unsched_cutoffs[0]); 167 | EXPECT_EQ(0, self->homa.max_sched_prio); 168 | } 169 | -------------------------------------------------------------------------------- /test/utils.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-2-Clause */ 2 | 3 | /* Utility functions for unit tests, implemented in C. */ 4 | 5 | struct homa_message_out; 6 | struct homa_rpc; 7 | struct unit_hash; 8 | 9 | /** 10 | * define UNIT_TEST_DATA_PER_PACKET - bytes of payload to use as the 11 | * default for packets sent in unit tests. 12 | */ 13 | #define UNIT_TEST_DATA_PER_PACKET 1400 14 | 15 | /** 16 | * enum unit_rpc_state - used as the @state argument to unit_client_rpc 17 | * and unit_server_rpc. 18 | * UNIT_OUTGOING - RPC state is RPC_OUTGOING, no packets have been sent. 19 | * UNIT_RCVD_ONE_PKT - RPC state is RPC_INCOMING, a single packet has 20 | * been received. 21 | * UNIT_RCVD_MSG - RPC state is RPC_INCOMING, the entire message has 22 | * been received. 23 | * UNIT_IN_SERVICE - RPC state is RPC_IN_SERVICE (only valid for 24 | * unit_server_rpc). 25 | */ 26 | enum unit_rpc_state { 27 | UNIT_OUTGOING = 21, 28 | UNIT_RCVD_ONE_PKT = 22, 29 | UNIT_RCVD_MSG = 23, 30 | UNIT_IN_SERVICE = 24, 31 | }; 32 | 33 | extern char *unit_ack_string(struct homa_ack *ack); 34 | extern struct homa_rpc 35 | *unit_client_rpc(struct homa_sock *hsk, 36 | enum unit_rpc_state state, struct in6_addr *client_ip, 37 | struct in6_addr *server_ip, int server_port, int id, 38 | int req_length, int resp_length); 39 | extern struct in6_addr 40 | unit_get_in_addr(char *s); 41 | extern void unit_homa_destroy(struct homa *homa); 42 | extern struct iov_iter 43 | *unit_iov_iter(void *buffer, size_t length); 44 | extern int unit_list_length(struct list_head *head); 45 | extern void unit_log_active_ids(struct homa_sock *hsk); 46 | extern void unit_log_filled_skbs(struct sk_buff *skb, int verbose); 47 | extern void unit_log_frag_list(struct sk_buff *skb, int verbose); 48 | extern void unit_log_grantables(struct homa *homa); 49 | extern void unit_log_hashed_rpcs(struct homa_sock *hsk); 50 | extern void unit_log_message_out_packets( 51 | struct homa_message_out *message, int verbose); 52 | extern const char *unit_print_gaps(struct homa_rpc *rpc); 53 | extern struct homa_rpc 54 | *unit_server_rpc(struct homa_sock *hsk, 55 | enum unit_rpc_state state, struct in6_addr *server_ip, 56 | struct in6_addr *client_ip, int client_port, int id, 57 | int req_length, int resp_length); 58 | extern void unit_log_skb_list(struct sk_buff_head *packets, 59 | int verbose); 60 | extern void unit_log_throttled(struct homa *homa); 61 | extern void unit_teardown(void); 62 | 63 | /* Kludge to avoid including arpa/inet.h, which causes definition 64 | * conflicts with kernel header files. 65 | */ 66 | extern int inet_pton(int af, const char *src, void *dst); 67 | -------------------------------------------------------------------------------- /util/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile to build various testing programs for Homa. 2 | 3 | CFLAGS := -Wall -Werror -fno-strict-aliasing -O3 -I.. 4 | 5 | BINS := buffer_client buffer_server cp_node dist_test dist_to_proto \ 6 | get_time_trace homa_prio homa_test inc_tput receive_raw scratch \ 7 | send_raw server smi test_time_trace use_memory 8 | 9 | OBJS := $(patsubst %,%.o,$(BINS)) 10 | 11 | LIB_SRCS := dist.cc homa_api.c test_utils.cc time_trace.cc 12 | LIB_OBJS := $(patsubst %.c,%.o,$(patsubst %.cc,%.o,$(LIB_SRCS))) 13 | LIB_OBJS += homa_receiver.o 14 | 15 | HDRS = ../homa_receiver.h ../homa.h dist.h time_trace.h 16 | 17 | .SECONDARY: $(OBJS) $(LIB_OBJS) 18 | 19 | all: $(BINS) 20 | 21 | cp_node: cp_node.o dist.o time_trace.o $(LIB_OBJS) 22 | g++ $(CFLAGS) $^ -lpthread -o $@ 23 | 24 | $(OBJS) $(LIB_OBJS): $(HDRS) 25 | 26 | homa_receiver.o: ../homa_receiver.cc ../homa_receiver.h 27 | g++ -c $(CFLAGS) -std=c++17 $< -o $@ 28 | 29 | # This seems to be the only way to disable the built-in implicit rules 30 | # for %:%.c and %:%.cc. 31 | .SUFFIXES: 32 | 33 | %: %.o $(LIB_OBJS) 34 | g++ $(CFLAGS) $^ -lpthread -o $@ 35 | 36 | %.o: %.cc test_utils.h ../homa.h 37 | g++ -c $(CFLAGS) -std=c++17 $< -o $@ 38 | 39 | %.o: %.c test_utils.h ../homa.h 40 | cc -c $(CFLAGS) $< -o $@ 41 | 42 | homa_api.o: ../homa_api.c ../homa.h 43 | cc -c $(CFLAGS) $< -o $@ 44 | 45 | clean: 46 | rm -f $(BINS) $(OBJS) $(LIB_OBJS) 47 | 48 | # The following target is useful for debugging Makefiles; it 49 | # prints the value of a make variable. 50 | print-%: 51 | @echo $* = $($*) 52 | -------------------------------------------------------------------------------- /util/README.md: -------------------------------------------------------------------------------- 1 | This directory contains a collection of utilities for testing and 2 | analyzing Homa. Here is a summary of some of the programs in this 3 | directory; for more information, run any program with the "--help" 4 | option, or look at its source code. 5 | 6 | ### Cluster Performance Tests 7 | 8 | **cp_node**: a program that runs on an individual node as part of cluster 9 | benchmarks. You can run this program by hand (e.g. on one client machine 10 | and one server machine): type `cp_node --help` for basic documentation. 11 | This program is also run automatically by the other cp_* benchmarks. 12 | 13 | **cp_vs_tcp**: the primary cluster performance test. Measures slowdown 14 | as a function of message size for Homa and TCP under various workloads. 15 | 16 | **cp_basic**: measures basic latency and throughput for Homa and TCP. 17 | 18 | **cp_client_threads**: measures the throughput of a single client as a 19 | function of the number of sending threads. 20 | 21 | **cp_config**: measures Homa slowdown while varying one or more 22 | configuration parameters. 23 | 24 | **cp_load**: generates CDFs of short message latency for Homa and 25 | TCP under different network loads. 26 | 27 | **cp_mtu**: generates CDFs of short message latency for Homa and TCP 28 | while varying the maximum packet length. 29 | 30 | **cp_server_ports**: measures single-server throughput as a function 31 | of the number of receiving ports. 32 | 33 | **cp_tcp**: measures the performance of TCP by itself, with no message 34 | truncation. 35 | 36 | ### Timetracing Tools 37 | A number of programs are available for collecting, transforming, and analyzing 38 | timetraces. Most have --help options that provide documentation. The following 39 | scripts are relatively general-purpose (i.e. they don't have Homa dependencies): 40 | 41 | **ttgrep.py**: extracts records from a timetrace that match a pattern and 42 | recomputes the time differences using only those records. 43 | 44 | **ttmerge.py**: combines two or more timetraces into a single timetrace. 45 | 46 | **ttoffset.py**: offsets all of the times in a timetrace by a given amount (usually 47 | done to line up times in one trace with times in another). 48 | 49 | **ttrange.py**: extracts timetrace entries from a given time range. 50 | 51 | **ttsum.py**: outputs statistics from a timetrace on the delay preceding each 52 | event. Can also produce a timeline for repeated operations such as processing 53 | a request on a server. 54 | 55 | The following scripts are Homa-specific: 56 | 57 | **ttprint.py**: extracts the most recent timetrace from the kernel and 58 | prints it to standard output. 59 | 60 | **ttsync.py**: analyzes Homa-specific information in a collection of 61 | timetraces simultaneously on different nodes and rewrites the traces to 62 | synchronize their clocks. 63 | 64 | **tthoma.py**: this is the primary script for analyzing Homa data. It 65 | contains multiple analyzers that extract different kinds of data from a 66 | collection of timetraces. Invoke with --help for full documentation. 67 | 68 | ### Other Useful Tools 69 | 70 | **diff_rtts.py**: compares two .rtts files collected by the cperf benchmarks, 71 | tries to identify how/why they are different. -------------------------------------------------------------------------------- /util/avg.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Reads lines and extracts the first floating-point number to appear on 5 | each line; prints both the individual values and the average of them. 6 | Usage: avg.py [file] 7 | """ 8 | 9 | from __future__ import division, print_function 10 | from glob import glob 11 | from optparse import OptionParser 12 | import math 13 | import os 14 | import re 15 | import string 16 | import sys 17 | 18 | if len(sys.argv) == 2: 19 | f = open(sys.argv[1]) 20 | elif len(sys.argv) == 1: 21 | f = sys.stdin 22 | else: 23 | print("Usage: %s [tt_file]" % (sys.argv[0])) 24 | sys.exit(1) 25 | 26 | values = [] 27 | 28 | for line in f: 29 | match = re.match('.*?[^0-9]([0-9]+[.][0-9]+)', line) 30 | if match: 31 | print('Found field %s' % (match.group(1))) 32 | values.append(float(match.group(1))) 33 | else: 34 | print('Line didn\'t match: %s' % (line)) 35 | 36 | if len(values): 37 | print('Average: %.3f' % (sum(values)/len(values))) 38 | else: 39 | print('No lines matched') -------------------------------------------------------------------------------- /util/buffer_client.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2019-2022 Homa Developers 2 | * SPDX-License-Identifier: BSD-1-Clause 3 | */ 4 | 5 | /* This is a test program used together with buffer_server.c to learn about 6 | * how TCP handles buffer exhaustion. This program opens an infinite series 7 | * of sockets to a single port and writes as much data to each socket as 8 | * if can before the socket backs up (it assumes that the server application 9 | * is not reading any of the data). Once each socket backs up, it goes on 10 | * to the next socket. 11 | * 12 | * Usage: 13 | * buffer_client hostName port 14 | */ 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | int main(int argc, char** argv) { 26 | int fd, status, port; 27 | struct addrinfo *result; 28 | struct addrinfo hints; 29 | char *host; 30 | #define BUFFER_SIZE 4096 31 | char buffer[BUFFER_SIZE]; 32 | int bytesSent; 33 | 34 | if (argc < 3) { 35 | printf("Usage: %s hostName port\n", argv[0]); 36 | exit(1); 37 | } 38 | host = argv[1]; 39 | port = strtol(argv[2], NULL, 10); 40 | if (port == 0) { 41 | printf("Bad port number %s; must be integer\n", 42 | argv[2]); 43 | exit(1); 44 | } 45 | 46 | memset(&hints, 0, sizeof(struct addrinfo)); 47 | hints.ai_family = AF_INET; 48 | hints.ai_socktype = SOCK_STREAM; 49 | status = getaddrinfo(host, argv[2], &hints, &result); 50 | if (status != 0) { 51 | printf("Couldn't look up address for %s: %s\n", 52 | host, gai_strerror(status)); 53 | exit(1); 54 | } 55 | 56 | while (1) { 57 | fd = socket(PF_INET, SOCK_STREAM, 0); 58 | if (fd < 0) { 59 | printf("Couldn't create socket: %s\n", strerror(errno)); 60 | exit(1); 61 | } 62 | status = connect(fd, result->ai_addr, result->ai_addrlen); 63 | if (status < 0) { 64 | close(fd); 65 | fd = -1; 66 | printf("Couldn't connect to %s:%d: %s\n", host, port, 67 | strerror(errno)); 68 | sleep(5); 69 | continue; 70 | } 71 | 72 | bytesSent = 0; 73 | while (1) { 74 | status = send(fd, buffer, BUFFER_SIZE, 75 | MSG_NOSIGNAL|MSG_DONTWAIT); 76 | if (status > 0) { 77 | bytesSent += status; 78 | continue; 79 | } 80 | if (status == 0) { 81 | printf("Fd %d got 0 status after sending %d bytes\n", 82 | fd, bytesSent); 83 | } else if (errno == EAGAIN) { 84 | printf("Fd %d blocked after sending %d bytes\n", 85 | fd, bytesSent); 86 | } else { 87 | printf("Fd %d failed after sending %d " 88 | "bytes: %s (%d)\n", 89 | fd, bytesSent, strerror(errno), 90 | errno); 91 | } 92 | break; 93 | } 94 | } 95 | exit(0); 96 | } 97 | 98 | -------------------------------------------------------------------------------- /util/buffer_server.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2019-2022 Homa Developers 2 | * SPDX-License-Identifier: BSD-1-Clause 3 | */ 4 | 5 | /* This is a test program used together with buffer_client.c to learn about 6 | * how TCP handles buffer exhaustion. This program opens accepts connections 7 | * on a given port, but it never reads any incoming data, so buffers will 8 | * pile up in the kernel. 9 | * 10 | * Usage: 11 | * buffer_server port 12 | */ 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | #include "homa.h" 24 | #include "test_utils.h" 25 | 26 | int main(int argc, char** argv) { 27 | int fd, port; 28 | int optval = 1; 29 | union sockaddr_in_union bindAddress; 30 | 31 | if (argc < 2) { 32 | printf("Usage: %s port\n", argv[0]); 33 | exit(1); 34 | } 35 | port = strtol(argv[1], NULL, 10); 36 | if (port == 0) { 37 | printf("Bad port number %s; must be integer\n", 38 | argv[1]); 39 | exit(1); 40 | } 41 | 42 | fd = socket(PF_INET, SOCK_STREAM, 0); 43 | if (fd < 0) { 44 | printf("Couldn't create socket: %s\n", strerror(errno)); 45 | exit(1); 46 | } 47 | if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &optval, 48 | sizeof(optval)) != 0) { 49 | printf("Couldn't set SO_REUSEADDR: %s\n", strerror(errno)); 50 | exit(1); 51 | } 52 | bindAddress.in4.sin_family = AF_INET; 53 | bindAddress.in4.sin_port = htons(port); 54 | bindAddress.in4.sin_addr.s_addr = htonl(INADDR_ANY); 55 | if (bind(fd, &bindAddress.sa, sizeof(bindAddress.in4)) 56 | != 0) { 57 | printf("Couldn't bind to port %d\n: %s\n", port, strerror(errno)); 58 | exit(1); 59 | } 60 | if (listen(fd, 1000000) != 0) { 61 | printf("Listen failed on socket: %s\n", strerror(errno)); 62 | exit(1); 63 | } 64 | 65 | while (1) { 66 | int peerFd; 67 | peerFd = accept(fd, NULL, NULL); 68 | if (peerFd < 0) { 69 | printf("Accept failed: %s\n", strerror(errno)); 70 | } else { 71 | printf("Accepted connection on fd %d\n", peerFd); 72 | } 73 | } 74 | exit(0); 75 | } 76 | 77 | -------------------------------------------------------------------------------- /util/cp_both: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright (c) 2024 Homa Developers 4 | # SPDX-License-Identifier: BSD-1-Clause 5 | 6 | # This cperf benchmark runs both TCP and Homa on each client and server 7 | # node in order to measure interference between the protocols. 8 | # Type "cp_both --help" for documentation. 9 | 10 | from cperf import * 11 | 12 | for option in ['client_max', 'client_ports', 'port_threads', 'server_ports', 13 | 'tcp_client_ports', 'tcp_server_ports']: 14 | default_defaults[option] = (default_defaults[option]+1)/2 15 | parser = get_parser(description= 16 | 'Measures slowdown when TCP and Homa are competing for resources ' 17 | 'on the same nodes.', 18 | usage='%(prog)s [options]', defaults={'homa_gbps': 0}) 19 | parser.add_argument('--homa-gbps', type=float, dest='homa_gbps', 20 | metavar='B', default=None, 21 | help='Configure Homa to generate B Gbps of total outgoing bandwidth ' 22 | 'on each node (clients and servers combined); the remainder of ' 23 | '--gbps will be generated by TCP (default: split --gbps between ' 24 | 'Homa and TCP)') 25 | default_defaults['client_max'] 26 | options = parser.parse_args() 27 | init(options) 28 | 29 | # First, run the experiment 30 | if not options.plot_only: 31 | homa_options = copy.deepcopy(options) 32 | homa_options.name = "homa_" + options.workload 33 | homa_options.protocol = "homa" 34 | 35 | tcp_options = copy.deepcopy(options) 36 | tcp_options.name = "tcp_" + options.workload 37 | tcp_options.protocol = "tcp" 38 | 39 | if options.homa_gbps == None: 40 | options.homa_gbps = options.gbps/2.0 41 | tcp_options.gbps = (options.gbps - options.homa_gbps)/2 42 | if tcp_options.gbps < 0: 43 | tcp_options.gbps = 0 44 | homa_options.gbps = options.gbps/2 - tcp_options.gbps 45 | try: 46 | run_experiments(homa_options, tcp_options) 47 | except Exception as e: 48 | log(traceback.format_exc()) 49 | log("Stopping nodes") 50 | stop_nodes() 51 | scan_logs() 52 | 53 | # Generate plots and reports 54 | homa_exp = "homa_" + options.workload 55 | scan_metrics(homa_exp) 56 | tcp_exp = "tcp_" + options.workload 57 | scan_metrics(tcp_exp) 58 | 59 | # Generate slowdown plot. 60 | log("Generating slowdown plot for %s" % (options.workload)) 61 | title = "TCP (%.1f Gbps) and Homa (%.1f Gbps) together, %s %d nodes" % ( 62 | options.gbps - options.homa_gbps, options.homa_gbps, 63 | options.workload.capitalize(), options.num_nodes) 64 | ax = start_slowdown_plot(title, 1000, homa_exp) 65 | plot_slowdown(ax, tcp_exp, "p99", "TCP P99", color=tcp_color) 66 | plot_slowdown(ax, tcp_exp, "p50", "TCP P50", color=tcp_color2) 67 | plot_slowdown(ax, homa_exp, "p99", "Homa P99", color=homa_color) 68 | plot_slowdown(ax, homa_exp, "p50", "Homa P50", color=homa_color2) 69 | ax.legend(loc="upper right", prop={'size': 9}) 70 | plt.tight_layout() 71 | plt.savefig("%s/reports/both_%s.pdf" % (options.log_dir, options.workload)) 72 | 73 | # Generate CDF of small-message RTTs. 74 | log("Generating short message CDF for %s" % (options.workload)) 75 | homa_x, homa_y = get_short_cdf(homa_exp) 76 | tcp_x, tcp_y = get_short_cdf(tcp_exp) 77 | start_cdf_plot(title, 10, 0.99e05, 1e-05, "RTT (usecs)", 78 | "Cumulative Fraction Short Messages") 79 | plt.plot(tcp_x, tcp_y, label="TCP", color=tcp_color) 80 | plt.plot(homa_x, homa_y, label="Homa", color=homa_color) 81 | plt.legend(loc="upper right", prop={'size': 9}) 82 | plt.savefig("%s/reports/short_cdf_%s.pdf" % (options.log_dir, 83 | options.workload)) 84 | -------------------------------------------------------------------------------- /util/cp_client_threads: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright (c) 2020-2023 Homa Developers 4 | # SPDX-License-Identifier: BSD-1-Clause 5 | 6 | # This cperf benchmark measures the throughput of a single client as a 7 | # function of the number of sending threads 8 | 9 | from cperf import * 10 | 11 | parser = get_parser(description= 12 | 'Measures throughput of a single client as a function of the number ' 13 | ' of sending threads.', 14 | usage='%(prog)s [options]', 15 | defaults={ 16 | "server_ports": 6, 17 | "port_threads": 3}) 18 | options = parser.parse_args() 19 | options.no_rtt_files = True 20 | init(options) 21 | if options.num_nodes < 2: 22 | print("--num_nodes too small (%d): must be at least 2" 23 | % (options.num_nodes)) 24 | sys.exit(-1) 25 | dir = "%s/reports" % (options.log_dir) 26 | if not os.path.exists(dir): 27 | os.makedirs(dir) 28 | 29 | options.server_nodes = options.num_nodes - 1 30 | options.first_server = 1 31 | options.port_receivers = 1 32 | options.no_rtt_files = True 33 | options.gbps = 0.0 34 | workloads = ["w1", "w2", "w3", "w4", "w5"] 35 | port_range = range(1,11) 36 | 37 | # Run the experiments, if desired 38 | if not options.plot_only: 39 | start_servers(range(1, options.num_nodes), options) 40 | for workload in workloads: 41 | for ports in port_range: 42 | exp = "%s_%s_%d" % (options.protocol, workload, ports) 43 | options.workload = workload 44 | options.client_ports = ports 45 | options.tcp_client_ports = ports 46 | run_experiment(exp, range(0,1), options) 47 | log("Stopping nodes") 48 | stop_nodes() 49 | 50 | # Parse the log files to extract useful data 51 | experiments = {} 52 | scan_log(options.log_dir + "/node0.log", "node0", experiments) 53 | 54 | # Keys are workload names, values are lists of throughputs for each 55 | # number of ports 56 | tput = {} 57 | 58 | for workload in workloads: 59 | tput[workload] = [] 60 | for ports in port_range: 61 | exp = "%s_%s_%d" % (options.protocol, workload, ports) 62 | node = experiments[exp]["node0"] 63 | readings = node["client_kops"] 64 | if len(readings) == 0: 65 | raise Error("No client RPC throughput found for experiment %s" 66 | % (exp)) 67 | tput[workload].append(sum(readings)/len(readings)) 68 | 69 | # print(tput) 70 | fig, (ax1, ax2) = plt.subplots(2, figsize=[4, 5]) 71 | fig.suptitle("%s Single-Client Throughput" % (options.protocol.capitalize()), 72 | y=0.95) 73 | plt.rcParams.update({'font.size': 10}) 74 | ax1.set_ylim(0, 2000) 75 | ax2.set_ylim(0, 60) 76 | for axis in [ax1, ax2]: 77 | axis.get_xaxis().set_tick_params(direction='in') 78 | axis.get_yaxis().set_tick_params(direction='in') 79 | axis.set_xlim(0, port_range[-1]) 80 | top = axis.twiny() 81 | top.set_xlim(0, port_range[-1]) 82 | top.set_xticklabels([]) 83 | top.get_xaxis().set_tick_params(direction='in') 84 | 85 | axis.set_ylabel("Kops/second") 86 | right = axis.twinx() 87 | right.set_ylim(0, axis.get_ylim()[1]) 88 | right.set_yticklabels([]) 89 | right.get_yaxis().set_tick_params(direction='in') 90 | ax1.grid(axis='y', which='major', linestyle='dotted') 91 | ax2.set_xlabel("Sending threads") 92 | ax2.grid(axis='y', which='major', linestyle='dotted') 93 | colors = ['#9467bd', '#d62728', '#2ca02c', '#ff7f0e', '#1f77b4'] 94 | for workload in workloads: 95 | if (workload == "w4") or (workload == "w5"): 96 | ax2.plot(port_range, tput[workload], label=workload, color=colors.pop()) 97 | else: 98 | ax1.plot(port_range, tput[workload], label=workload, color=colors.pop()) 99 | 100 | ax1.legend(loc="upper left", prop={'size': 9}) 101 | ax2.legend(loc="upper left", prop={'size': 9}) 102 | # plt.tight_layout() 103 | plt.savefig("%s/reports/%s_client_tput.pdf" % (options.log_dir, options.protocol)) -------------------------------------------------------------------------------- /util/cp_mtu: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright (c) 2020-2022 Homa Developers 4 | # SPDX-License-Identifier: BSD-1-Clause 5 | 6 | # This cperf benchmark generates CDFs of short-message latency for Homa 7 | # and TCP under different values for MTU (maximum packet size). 8 | # Type "cp_vs_mtu --help" for documentation. 9 | 10 | from cperf import * 11 | 12 | parser = get_parser(description= 13 | 'Generates small-message latency CDFs as a function of MTU for ' 14 | 'Homa and TCP.', 15 | usage='%(prog)s [options]'}) 16 | parser.add_argument('-w', '--workload', dest='workload', 17 | metavar='W', required = True, 18 | help='Workload to use for benchmark: w1-w5 or number') 19 | options = parser.parse_args() 20 | init(options) 21 | options.gbps = options.gbps/2.0 22 | servers = range(0, options.num_nodes) 23 | clients = range(0, options.num_nodes) 24 | mtus = [1500, 3000, 6000, 9000] 25 | 26 | # Run the experiments 27 | if not options.plot_only: 28 | try: 29 | do_ssh(["config", "mtu", "1500"], range(0, options.num_nodes)) 30 | options.protocol = "homa" 31 | start_servers(servers, options) 32 | 33 | o = copy.deepcopy(options) 34 | o.gbps = 0.0 35 | o.client_ports = 1 36 | o.client_max = 1 37 | o.server_ports = 1 38 | o.server_nodes = 1 39 | o.first_server = 1 40 | o.unloaded = 500 41 | run_experiment("unloaded_%s" % (options.workload), range(0, 1), o) 42 | 43 | for mtu in mtus: 44 | do_ssh(["config", "mtu", str(mtu)], range(0, options.num_nodes)) 45 | start_servers(servers, options) 46 | run_experiment("homa_%s_mtu%d" % (options.workload, mtu), 47 | clients, options) 48 | 49 | options.protocol = "tcp" 50 | for mtu in mtus: 51 | do_ssh(["config", "mtu", str(mtu)], range(0, options.num_nodes)) 52 | start_servers(servers, options) 53 | run_experiment("tcp_%s_mtu%d" % (options.workload, mtu), 54 | clients, options) 55 | 56 | do_ssh(["config", "mtu", "1500"], range(0, options.num_nodes)) 57 | except Exception as e: 58 | log(traceback.format_exc()) 59 | 60 | log("Stopping nodes") 61 | stop_nodes() 62 | scan_logs() 63 | 64 | # Generate plots and reports 65 | unloaded_exp = "unloaded_%s" % (options.workload) 66 | set_unloaded(unloaded_exp) 67 | 68 | # Generate CDF of small-message RTTs. 69 | log("Generating short message CDFs") 70 | title = "%s %d nodes" % (options.workload.capitalize(), options.num_nodes) 71 | start_cdf_plot(title, 10, 0.99e05, 1e-05, "RTT (usecs)", 72 | "Cumulative Fraction of Short Messages") 73 | 74 | set_unloaded("unloaded_%s" % (options.workload)) 75 | styles = [(0, (1, 1)), (0, (2, 2)), (0, (5, 2)), "solid"] 76 | for mtu in mtus: 77 | x, y = get_short_cdf("tcp_%s_mtu%d" % (options.workload, mtu)) 78 | plt.plot(x, y, label="TCP MTU %d" % (mtu), 79 | color=tcp_color, linestyle=styles.pop()) 80 | styles = [(0, (1, 1)), (0, (2, 2)), (0, (5, 2)), "solid"] 81 | for mtu in mtus: 82 | x, y = get_short_cdf("homa_%s_mtu%d" % (options.workload, mtu)) 83 | plt.plot(x, y, label="Homa MTU %d" % (mtu), 84 | color=homa_color, linestyle=styles.pop()) 85 | x, y = get_short_cdf(unloaded_exp) 86 | plt.plot(x, y, label="Homa best case", color=unloaded_color) 87 | 88 | plt.legend(loc="upper right", prop={'size': 9}) 89 | plt.savefig("%s/reports/mtu_cdfs_%s.pdf" % (options.log_dir, options.workload)) 90 | 91 | # print(plt.rcParams['axes.prop_cycle'].by_key()['color']) -------------------------------------------------------------------------------- /util/cp_server_ports: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright (c) 2020-2022 Homa Developers 4 | # SPDX-License-Identifier: BSD-1-Clause 5 | 6 | # This cperf benchmark measures the throughput of a single server as a 7 | # function of the number of receiving ports 8 | 9 | from cperf import * 10 | 11 | parser = get_parser(description= 12 | 'Measures throughput of a single server as a function of the number ' 13 | ' of receiving ports.', 14 | usage='%(prog)s [options]', 15 | defaults={ 16 | "server_ports": 6, 17 | "port_threads": 3}) 18 | options = parser.parse_args() 19 | options.no_rtt_files = True 20 | init(options) 21 | if options.num_nodes < 2: 22 | print("--num_nodes too small (%d): must be at least 2" 23 | % (options.num_nodes)) 24 | sys.exit(-1) 25 | dir = "%s/reports" % (options.log_dir) 26 | if not os.path.exists(dir): 27 | os.makedirs(dir) 28 | 29 | options.server_nodes = 1 30 | options.first_server = 0 31 | options.client_ports = 5 32 | options.tcp_client_ports = 5 33 | options.port_receivers = 2 34 | options.no_rtt_files = True 35 | options.gbps = 0.0 36 | # workloads = ["w1", "w2", "w3", "w4", "w5"] 37 | workloads = ["w2", "w4"] 38 | if options.protocol == "homa": 39 | port_range = range(1, (20//options.port_threads) + 1) 40 | else: 41 | port_range = range(2, 21, 2) 42 | 43 | print("port_range: %s" % (port_range)) 44 | 45 | # Run the experiments, if desired 46 | if not options.plot_only: 47 | for ports in port_range: 48 | options.server_ports = ports 49 | options.tcp_server_ports = ports 50 | start_servers(range(0,1), options) 51 | for workload in workloads: 52 | exp = "%s_%s_%d" % (options.protocol, workload, ports) 53 | options.workload = workload 54 | run_experiment(exp, range(1, options.num_nodes), options) 55 | log("Stopping nodes") 56 | stop_nodes() 57 | 58 | # Parse the log files to extract useful data 59 | experiments = {} 60 | scan_log(options.log_dir + "/node0.log", "node0", experiments) 61 | 62 | # Keys are workload names, values are lists of throughputs for each 63 | # number of ports 64 | tput = {} 65 | 66 | for workload in workloads: 67 | tput[workload] = [] 68 | for ports in port_range: 69 | exp = "%s_%s_%d" % (options.protocol, workload, ports) 70 | print("Experiment %s: %s" % (exp, experiments[exp])) 71 | node = experiments[exp]["node0"] 72 | readings = node["server_kops"] 73 | if len(readings) == 0: 74 | raise Error("No RPC throughput found for experiment %s" 75 | % (exp)) 76 | tput[workload].append(sum(readings)/len(readings)) 77 | 78 | fig, (ax1, ax2) = plt.subplots(2, figsize=[4, 5]) 79 | fig.suptitle("%s Single-Server Throughput" % (options.protocol.capitalize()), 80 | y=0.95) 81 | plt.rcParams.update({'font.size': 10}) 82 | ax1.set_ylim(0, 2000) 83 | ax2.set_ylim(0, 60) 84 | for axis in [ax1, ax2]: 85 | axis.get_xaxis().set_tick_params(direction='in') 86 | axis.get_yaxis().set_tick_params(direction='in') 87 | axis.set_xlim(0, port_range[-1]) 88 | top = axis.twiny() 89 | top.set_xlim(0, port_range[-1]) 90 | top.set_xticklabels([]) 91 | top.get_xaxis().set_tick_params(direction='in') 92 | 93 | axis.set_ylabel("Kops/second") 94 | right = axis.twinx() 95 | right.set_ylim(0, axis.get_ylim()[1]) 96 | right.set_yticklabels([]) 97 | right.get_yaxis().set_tick_params(direction='in') 98 | ax1.grid(axis='y', which='major', linestyle='dotted') 99 | ax2.set_xlabel("Receiving ports") 100 | ax2.grid(axis='y', which='major', linestyle='dotted') 101 | colors = ['#9467bd', '#d62728', '#2ca02c', '#ff7f0e', '#1f77b4'] 102 | for workload in workloads: 103 | if (workload == "w4") or (workload == "w5"): 104 | ax2.plot(port_range, tput[workload], label=workload, color=colors.pop()) 105 | else: 106 | ax1.plot(port_range, tput[workload], label=workload, color=colors.pop()) 107 | 108 | ax1.legend(loc="upper left", prop={'size': 9}) 109 | ax2.legend(loc="upper left", prop={'size': 9}) 110 | # plt.tight_layout() 111 | plt.savefig("%s/reports/%s_server_tput.pdf" % (options.log_dir, options.protocol)) -------------------------------------------------------------------------------- /util/cp_tcp: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright (c) 2020-2022 Homa Developers 4 | # SPDX-License-Identifier: BSD-1-Clause 5 | 6 | # This cperf benchmark measures the performance of TCP by itself, with 7 | # no message truncation. 8 | # Type "cp_tcp --help" for documentation. 9 | 10 | from cperf import * 11 | 12 | parser = get_parser(description= 13 | 'Measures slowdown as a function of message size for TCP or DCTCP.', 14 | usage='%(prog)s [options]') 15 | parser.add_argument('--dctcp', dest='dctcp', type=boolean, 16 | default=False, help="Boolean value:: indicates whether measurements " 17 | "should be run on DCTCP (default: false)") 18 | options = parser.parse_args() 19 | # options.no_trunc = True 20 | init(options) 21 | servers = range(0, options.num_nodes) 22 | clients = range(0, options.num_nodes) 23 | 24 | if options.workload != "": 25 | load_info = [[options.workload, options.gbps]]; 26 | 27 | if options.dctcp: 28 | prot = "dctcp" 29 | label = "DCTCP" 30 | color = dctcp_color 31 | else: 32 | prot = "tcp" 33 | label = "DCTCP" 34 | color = tcp_color 35 | 36 | # First, run all of the experiments 37 | if not options.plot_only: 38 | congestion = get_sysctl_parameter("net.ipv4.tcp_congestion_control") 39 | if options.dctcp: 40 | set_sysctl_parameter("net.ipv4.tcp_congestion_control", 41 | "dctcp", range(0, options.num_nodes)) 42 | else: 43 | set_sysctl_parameter("net.ipv4.tcp_congestion_control", 44 | "cubic", range(0, options.num_nodes)) 45 | for workload, bw in load_info: 46 | options.workload = workload 47 | options.gbps = bw/2.0 48 | unloaded_exp = "unloaded_" + workload; 49 | exp = "%s_%s" % (prot, workload); 50 | try: 51 | options.protocol = "tcp" 52 | start_servers(servers, options) 53 | 54 | o = copy.deepcopy(options); 55 | o.tcp_client_ports = 1 56 | o.client_max = 1 57 | o.tcp_server_ports = 1 58 | o.server_nodes = 1 59 | o.first_server = 1 60 | o.unloaded = 500 61 | run_experiment(unloaded_exp, range(0, 1), o) 62 | run_experiment(exp, clients, options) 63 | do_cmd("tt print cp.tt", clients) 64 | except Exception as e: 65 | log(traceback.format_exc()) 66 | 67 | set_sysctl_parameter("net.ipv4.tcp_congestion_control", congestion, 68 | range(0, options.num_nodes)) 69 | log("Stopping nodes") 70 | stop_nodes() 71 | scan_logs() 72 | 73 | # Generate plots and reports 74 | for workload, bw in load_info: 75 | unloaded_exp = "unloaded_" + workload; 76 | exp = "%s_%s" % (prot, workload); 77 | 78 | set_unloaded(unloaded_exp) 79 | 80 | # Generate slowdown plot. 81 | log("Generating slowdown plot for %s" % (workload)) 82 | title = "%s %d nodes, %.1f Gbps" % (workload.capitalize(), 83 | options.num_nodes, bw) 84 | ax = start_slowdown_plot(title, 1000, exp) 85 | plot_slowdown(ax, exp, "p99", "%s P99" % (prot)) 86 | plot_slowdown(ax, exp, "p50", "%s P50" % (prot)) 87 | ax.legend() 88 | plt.tight_layout() 89 | plt.savefig("%s/reports/%s_%s.pdf" % (options.log_dir, prot, workload)) 90 | 91 | # Generate CDF of small-message RTTs. 92 | log("Generating short message CDF for %s" % (workload)) 93 | x, y = get_short_cdf(exp) 94 | start_cdf_plot(title, 10, 0.99e05, 1e-05, "RTT (usecs)", 95 | "Cumulative Fraction Short Messages") 96 | plt.plot(x, y, label=label, color=color) 97 | plt.legend(loc="upper right", prop={'size': 9}) 98 | plt.savefig("%s/reports/short_cdf_%s.pdf" % (options.log_dir, workload)) 99 | -------------------------------------------------------------------------------- /util/cp_vs_tcp: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright (c) 2020-2023 Homa Developers 4 | # SPDX-License-Identifier: BSD-1-Clause 5 | 6 | # This cperf benchmark compares the performance of Homa with TCP. 7 | # Type "cp_vs_tcp --help" for documentation. 8 | 9 | from cperf import * 10 | 11 | # Workloads, bandwidths, and running times to use by default. 12 | load_info = [["w2", 3.2, 5], ["w3", 14, 10], ["w4", 20, 20], ["w5", 20, 30]] 13 | 14 | parser = get_parser(description= 15 | 'Measures slowdown as a function of message size for Homa and TCP.', 16 | usage='%(prog)s [options]') 17 | parser.add_argument('--tcp', dest='tcp', type=boolean, 18 | default=True, help="Boolean value: indicates whether measurements " 19 | "should be run on TCP (default: true)") 20 | parser.add_argument('--dctcp', dest='dctcp', type=boolean, 21 | default=False, help="Boolean value:: indicates whether measurements " 22 | "should be run on DCTCP (default: false)") 23 | parser.add_argument('--servers', dest='num_servers', type=int, metavar='count', 24 | default=0, help="If nonzero, clients and servers will be segregated " 25 | "and this value indicates the number of server nodes; all other " 26 | "nodes will be clients. If 0, each node runs both a client and a " 27 | "server (default: 0)") 28 | parser.add_argument('--skip-unloaded', dest='skip_unloaded', type=boolean, 29 | default=True, help="Boolean value:: true means don't measure" 30 | "Homa latency under low load (default: true)") 31 | options = parser.parse_args() 32 | init(options) 33 | 34 | bw_multiplier = 0.5 35 | if options.num_servers > 0: 36 | if options.num_servers >= options.num_nodes: 37 | raise Error("Illegal value %d for --servers option; must be less " 38 | "than --nodes (%d)" % (options.num_servers, 39 | options.num_nodes)) 40 | options.servers = options.nodes[0:options.num_servers] 41 | options.clients = options.nodes[options.num_servers:len(options.nodes)] 42 | options.server_ports = options.server_ports * 2 43 | options.client_ports = options.client_ports * 2 44 | options.tcp_server_ports = options.tcp_server_ports * 2 45 | options.tcp_client_ports = options.tcp_client_ports * 2 46 | bw_multiplier = min(len(options.servers), len(options.clients)) \ 47 | / len(options.clients) 48 | 49 | if options.workload != "": 50 | load_info = [[options.workload, options.gbps, options.seconds]] 51 | 52 | # First, run all of the experiments 53 | if not options.plot_only: 54 | congestion = get_sysctl_parameter("net.ipv4.tcp_congestion_control") 55 | for workload, bw, seconds in load_info: 56 | options.workload = workload 57 | options.gbps = bw * bw_multiplier 58 | options.seconds = seconds 59 | unloaded_exp = "unloaded_" + workload 60 | homa_exp = "homa_" + workload 61 | tcp_exp = "tcp_" + workload 62 | dctcp_exp = "dctcp_" + workload 63 | try: 64 | options.protocol = "homa" 65 | 66 | if not options.skip_unloaded: 67 | start_servers(unloaded_exp, options.nodes[1:2], options) 68 | o = copy.deepcopy(options) 69 | o.gbps = 0.0 70 | o.client_ports = 1 71 | o.client_max = 1 72 | o.server_ports = 1 73 | o.unloaded = 500 74 | run_experiment(unloaded_exp, options.nodes[0:1], o) 75 | 76 | start_servers(homa_exp, options.servers, options) 77 | run_experiment(homa_exp, options.clients, options) 78 | 79 | if options.tcp: 80 | options.protocol = "tcp" 81 | set_sysctl_parameter("net.ipv4.tcp_congestion_control", 82 | "cubic", range(0, options.num_nodes)) 83 | start_servers(tcp_exp, options.servers, options) 84 | run_experiment(tcp_exp, options.clients, options) 85 | 86 | if options.dctcp: 87 | options.protocol = "tcp" 88 | set_sysctl_parameter("net.ipv4.tcp_congestion_control", 89 | "dctcp", range(0, options.num_nodes)) 90 | start_servers(tcp_exp, options.servers, options) 91 | run_experiment(dctcp_exp, options.clients, options) 92 | except Exception as e: 93 | log(traceback.format_exc()) 94 | 95 | if options.tcp or options.dctcp: 96 | print("Resetting TCP congestion control to %s" % (congestion)) 97 | set_sysctl_parameter("net.ipv4.tcp_congestion_control", congestion, 98 | range(0, options.num_nodes)) 99 | log("Stopping nodes") 100 | stop_nodes() 101 | scan_logs() 102 | 103 | # Generate plots and reports 104 | for workload, bw, seconds in load_info: 105 | unloaded_exp = "unloaded_" + workload 106 | homa_exp = "homa_" + workload 107 | tcp_exp = "tcp_" + workload 108 | dctcp_exp = "dctcp_" + workload 109 | scan_metrics(homa_exp) 110 | 111 | if not options.skip_unloaded: 112 | set_unloaded(unloaded_exp) 113 | 114 | # Generate slowdown plot. 115 | log("Generating slowdown plot for %s" % (workload)) 116 | title = "%s %d nodes, %.1f Gbps" % (workload.capitalize(), 117 | options.num_nodes, bw) 118 | ax = start_slowdown_plot(title, 1000, homa_exp) 119 | if options.tcp: 120 | plot_slowdown(ax, tcp_exp, "p99", "TCP P99", color=tcp_color) 121 | plot_slowdown(ax, tcp_exp, "p50", "TCP P50", color=tcp_color2) 122 | if options.dctcp: 123 | plot_slowdown(ax, dctcp_exp, "p99", "DCTCP P99", color=dctcp_color) 124 | plot_slowdown(ax, dctcp_exp, "p50", "DCTCP P50", color=dctcp_color2) 125 | plot_slowdown(ax, homa_exp, "p99", "Homa P99", color=homa_color) 126 | plot_slowdown(ax, homa_exp, "p50", "Homa P50", color=homa_color2) 127 | ax.legend(loc="upper right", prop={'size': 9}) 128 | plt.tight_layout() 129 | plt.savefig("%s/reports/vs_tcp_%s.pdf" % (options.log_dir, workload)) 130 | 131 | # Generate CDF of small-message RTTs. 132 | log("Generating short message CDF for %s" % (workload)) 133 | if not options.skip_unloaded: 134 | unloaded_x, unloaded_y = get_short_cdf(unloaded_exp) 135 | homa_x, homa_y = get_short_cdf(homa_exp) 136 | if options.tcp: 137 | tcp_x, tcp_y = get_short_cdf(tcp_exp) 138 | if options.dctcp: 139 | dctcp_x, dctcp_y = get_short_cdf(dctcp_exp) 140 | start_cdf_plot(title, 10, 0.99e05, 1e-05, "RTT (usecs)", 141 | "Cumulative Fraction Short Messages") 142 | if options.tcp: 143 | plt.plot(tcp_x, tcp_y, label="TCP", color=tcp_color) 144 | if options.dctcp: 145 | plt.plot(dctcp_x, dctcp_y, label="DCTCP", color=dctcp_color) 146 | plt.plot(homa_x, homa_y, label="Homa", color=homa_color) 147 | if not options.skip_unloaded: 148 | plt.plot(unloaded_x, unloaded_y, label="Homa best case", 149 | color=unloaded_color) 150 | plt.legend(loc="upper right", prop={'size': 9}) 151 | plt.savefig("%s/reports/short_cdf_%s.pdf" % (options.log_dir, workload)) 152 | -------------------------------------------------------------------------------- /util/diff_metrics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright (c) 2018-2022 Homa Developers 4 | # SPDX-License-Identifier: BSD-1-Clause 5 | 6 | """ 7 | This program reads 2 Homa metrics files (/proc/net/homa_metrics) 8 | and prints out all of the statistics that have changed, in the 9 | same format as the original files. 10 | 11 | Usage: 12 | diff_metrics file1 file2 13 | """ 14 | 15 | from __future__ import division, print_function 16 | from glob import glob 17 | from optparse import OptionParser 18 | import math 19 | import os 20 | import re 21 | import string 22 | import sys 23 | 24 | # Contains values for all the metrics from the first file. Keys are 25 | # metric names, values are metric values. 26 | metrics = {} 27 | 28 | def scan_first(name): 29 | """ 30 | Scan the metrics file given by 'name' and record its metrics. 31 | """ 32 | global metrics 33 | f = open(name) 34 | 35 | for line in f: 36 | match = re.match('^([^ ]+) *([0-9]+) *(.*)', line) 37 | if not match: 38 | print("Didn't match: %s\n" % (line)) 39 | continue 40 | metrics[match.group(1)] = long(match.group(2)) 41 | f.close() 42 | 43 | def scan_second(name): 44 | """ 45 | Scan the metrics file given by 'name', compare its metrics to 46 | those that have been recorded, and print an output line with 47 | the difference, if there is any. 48 | """ 49 | global metrics 50 | f = open(name) 51 | 52 | for line in f: 53 | match = re.match('^([^ ]+) *([0-9]+) *(.*)', line) 54 | if not match: 55 | print("Didn't match: %s\n" % (line)) 56 | continue 57 | name = match.group(1) 58 | value = long(match.group(2)) 59 | comment = match.group(3) 60 | if not name in metrics: 61 | print("No metric for %s\n" % (name)) 62 | continue 63 | # print("%s: %d %d\n" % (name, metrics[name], value)) 64 | diff = value - metrics[name] 65 | if diff == 0: 66 | continue 67 | print("%-22s %15lu %s" % (name, diff, comment)) 68 | f.close() 69 | 70 | if len(sys.argv) != 3: 71 | printf("Usage: %s file file2\n" % sys.argv[0]) 72 | exit(1) 73 | 74 | scan_first(sys.argv[1]) 75 | scan_second(sys.argv[2]) -------------------------------------------------------------------------------- /util/diff_rtts.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright (c) 2023 Homa Developers 4 | # SPDX-License-Identifier: BSD-1-Clause 5 | 6 | """ 7 | Compare two .rtts files to identify differences between them. 8 | 9 | Usage: diff_rtts.py file1 file2 10 | """ 11 | 12 | from __future__ import division, print_function 13 | from glob import glob 14 | from operator import itemgetter 15 | from optparse import OptionParser 16 | import math 17 | import os 18 | import re 19 | import string 20 | import sys 21 | 22 | def read_rtts(file): 23 | """ 24 | Read a .rtts file and returns a list of (length, slowdown) pairs. 25 | 26 | file: Name of file to read 27 | """ 28 | 29 | slowdowns = [] 30 | f = open(file) 31 | for line in f: 32 | if line.startswith('#') or not line: 33 | continue 34 | match = re.match(' *([0-9]+) +([0-9.]+)', line) 35 | if not match: 36 | raise Exception("Malformed line in .rtts file: %s" % (line.rstrip())) 37 | length = int(match.group(1)) 38 | rtt = float(match.group(2)) 39 | 40 | # Optimal time (usecs) assumes 13 usec minimum, 25 Gbps network 41 | optimal = 13.0 + length*8/25000.0 42 | slowdown = rtt/optimal 43 | slowdowns.append([length, slowdown]) 44 | f.close() 45 | return slowdowns 46 | 47 | def avg_slowdown(slowdowns): 48 | """ 49 | Return average slowdown from a list of (length, slowdown) pairs. 50 | 51 | slowdowns: Input list 52 | """ 53 | sum = 0.0 54 | for item in slowdowns: 55 | sum += item[1] 56 | return sum/len(slowdowns) 57 | 58 | def deciles(slowdowns): 59 | """ 60 | Given a list of (length, slowdown) pairs, divide into 10 groups by 61 | length, then returns 6 lists (each with one entry per decile), 62 | containing: 63 | * largest length in the decile 64 | * P50 slowdown for the decile 65 | * P90 slowdown for the decile 66 | * P99 slowdown for the decile 67 | * P99.9 slowdown for the decile 68 | * max slowdown for the decile 69 | """ 70 | p50 = [] 71 | p90 = [] 72 | p99 = [] 73 | p999 = [] 74 | max = [] 75 | cutoffs = [] 76 | s = sorted(slowdowns, key = itemgetter(0)) 77 | for split in range(1, 11): 78 | split_start = len(s)*(split-1)//10 79 | split_end = len(s)*split//10 80 | decile = [] 81 | for i in range(split_start, split_end): 82 | decile.append(s[i][1]) 83 | cutoffs.append(s[split_end-1][0]) 84 | decile = sorted(decile) 85 | p50.append(decile[len(decile)//2]) 86 | p90.append(decile[len(decile)*9//10]) 87 | p99.append(decile[len(decile)*99//100]) 88 | p999.append(decile[len(decile)*999//1000]) 89 | max.append(decile[-1]) 90 | return cutoffs, p50, p90, p99, p999, max 91 | 92 | 93 | if len(sys.argv) != 3: 94 | print("Usage: diff_rtts.py file1 file2") 95 | exit(1) 96 | f1 = sys.argv[1] 97 | f2 = sys.argv[2] 98 | 99 | s1 = read_rtts(f1) 100 | print("Average slowdown in %s: %.1f" % (f1, avg_slowdown(s1))) 101 | 102 | s2 = read_rtts(sys.argv[2]) 103 | print("Average slowdown in %s: %.1f" % (f2, avg_slowdown(s2))) 104 | print("") 105 | 106 | c1, p50_1, p90_1, p99_1, p999_1, max_1 = deciles(s1) 107 | c2, p50_2, p90_2, p99_2, p999_2, max_2 = deciles(s2) 108 | 109 | out = "" 110 | for cutoff in c1: 111 | out += " %d" % (cutoff) 112 | print("Cutoffs for %s:%s" % (f1, out)) 113 | out = "" 114 | for cutoff in c2: 115 | out += " %d" % (cutoff) 116 | print("Cutoffs for %s:%s" % (f2, out)) 117 | print("") 118 | 119 | out = "" 120 | for val in p50_1: 121 | out += " %5.1f" % (val) 122 | print("P50s for %s:%s" % (f1, out)) 123 | out = "" 124 | for val in p50_2: 125 | out += " %5.1f" % (val) 126 | print("P50s for %s:%s" % (f2, out)) 127 | print("") 128 | 129 | out = "" 130 | for val in p90_1: 131 | out += " %5.1f" % (val) 132 | print("P90s for %s:%s" % (f1, out)) 133 | out = "" 134 | for val in p90_2: 135 | out += " %5.1f" % (val) 136 | print("P90s for %s:%s" % (f2, out)) 137 | print("") 138 | 139 | out = "" 140 | for val in p99_1: 141 | out += " %5.1f" % (val) 142 | print("P99s for %s:%s" % (f1, out)) 143 | out = "" 144 | for val in p99_2: 145 | out += " %5.1f" % (val) 146 | print("P99s for %s:%s" % (f2, out)) 147 | print("") 148 | 149 | out = "" 150 | for val in p999_1: 151 | out += " %5.1f" % (val) 152 | print("P99.9s for %s:%s" % (f1, out)) 153 | out = "" 154 | for val in p999_2: 155 | out += " %5.1f" % (val) 156 | print("P99.9s for %s:%s" % (f2, out)) 157 | print("") 158 | 159 | out = "" 160 | for val in max_1: 161 | out += " %5.1f" % (val) 162 | print("Maxes for %s:%s" % (f1, out)) 163 | out = "" 164 | for val in max_2: 165 | out += " %5.1f" % (val) 166 | print("Maxes for %s:%s" % (f2, out)) 167 | 168 | exit(0) -------------------------------------------------------------------------------- /util/dist.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2019-2023 Homa Developers 2 | * SPDX-License-Identifier: BSD-1-Clause 3 | */ 4 | 5 | /* This file defines the kernel contains information and supporting 6 | * functions for the workload distributions from the Homa paper. 7 | */ 8 | 9 | #ifndef _DIST_H 10 | #define _DIST_H 11 | 12 | #include 13 | #include 14 | 15 | /** 16 | * class @dist_point_gen: - Represents a CDF of message lengths and generates 17 | * randomized lengths according to that CDF. 18 | */ 19 | class dist_point_gen { 20 | public: 21 | dist_point_gen(const char* workload, size_t max_size, 22 | double min_bucket_frac = .0025, double max_size_ratio = 1.2); 23 | int operator()(std::mt19937 &rand_gen); 24 | double get_mean() const {return dist_mean;} 25 | double dist_overhead(int mtu) const; 26 | std::vector values() const; 27 | std::vector cdf_fractions() const; 28 | 29 | /** 30 | * struct dist_point - Describes one point in a CDF of message lengths. 31 | */ 32 | struct cdf_point { 33 | /** @length: message length, in bytes. */ 34 | size_t length; 35 | 36 | /** 37 | * @fraction: fraction of all messages that are this size 38 | * or smaller. 39 | */ 40 | double fraction; 41 | 42 | cdf_point(size_t length, double fraction) 43 | : length(length), fraction(fraction) 44 | {} 45 | }; 46 | 47 | private: 48 | /** 49 | * @dist_points: collection of individual data points that 50 | * make up this CDF (in increasing order of length). 51 | */ 52 | std::vector dist_points; 53 | 54 | /** 55 | * @dist_mean: the average value of this distribution. 56 | */ 57 | double dist_mean; 58 | 59 | /** @uniform_dist: used to generate values in the range [0, 1). */ 60 | std::uniform_real_distribution uniform_dist; 61 | 62 | static int dist_msg_overhead(int length, int mtu); 63 | }; 64 | #endif /* _DIST_H */ 65 | -------------------------------------------------------------------------------- /util/dist_test.cc: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2023 Homa Developers 2 | * SPDX-License-Identifier: BSD-1-Clause 3 | */ 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "dist.h" 13 | #include "test_utils.h" 14 | 15 | /** 16 | * define HOMA_MAX_MESSAGE_LENGTH - Maximum bytes of payload in a Homa 17 | * request or response message. 18 | */ 19 | #define HOMA_MAX_MESSAGE_LENGTH 1000000 20 | 21 | /** @rand_gen: random number generator. */ 22 | static std::mt19937 rand_gen( 23 | std::chrono::system_clock::now().time_since_epoch().count()); 24 | 25 | /* This file tests the dist.cc/dist.h files and dist_point_gen class. It will 26 | * print the CDF for every generated length, a histogram to show how often each 27 | * length was generated, the sizes of the given distribution, and finally the 28 | * mean, range, and overhead of the distribution requested. 29 | * 30 | * Produced by: 31 | * ./dist_test workload [number of points] [max message length] 32 | * 33 | * @workload: - the distribution requested for the test. Can be workload 1-5 34 | * or a fixed distribution. 35 | * 36 | * @number_of_points: - the number of points that the dist_point_gen will 37 | * randomly generate for the test. (Default = 10). 38 | * 39 | * @max_message_length: - the maximum size of a message. 40 | */ 41 | int main (int argc, char**argv) 42 | { 43 | int max_message_length = HOMA_MAX_MESSAGE_LENGTH; 44 | size_t num_points = 10; 45 | if (argc < 2) { 46 | fprintf(stderr, "Usage: %s workload [# points] [max_message_length]", 47 | argv[0]); 48 | } 49 | if (argc > 3) { 50 | max_message_length = atoi(argv[3]); 51 | } 52 | if (argc > 2) { 53 | num_points = atoi(argv[2]); 54 | } 55 | 56 | dist_point_gen generator(argv[1], max_message_length); 57 | std::map hist; 58 | std::map cdf; 59 | 60 | uint64_t start = rdtsc(); 61 | for (size_t i = 0; i < 1'000'000; i++) { 62 | generator(rand_gen); 63 | } 64 | uint64_t end = rdtsc(); 65 | double avg_ns = double(end-start)/(get_cycles_per_sec()*1e-09)/1'000'000; 66 | 67 | for (size_t i = 0; i < num_points; i++) { 68 | hist[generator(rand_gen)]++; 69 | } 70 | 71 | int count = 0; 72 | for (std::map::const_iterator it = hist.begin(); 73 | it != hist.end(); ++it) { 74 | count += it->second; 75 | cdf[it->first] = count; 76 | } 77 | 78 | printf("\nCDF:\n"); 79 | for (const auto [key, val] : cdf) { 80 | printf("%7d %6.4f\n", key, val/num_points); 81 | } 82 | 83 | printf("\nHistogram:\n"); 84 | for (const auto [key, val] : hist) { 85 | printf("%d %d\n", key, val); 86 | } 87 | 88 | std::vector sizes = generator.values(); 89 | printf("\nSizes:\n"); 90 | for (const int num : sizes) { 91 | printf("%d\n", num); 92 | } 93 | 94 | printf("\nMean: %.1f\n", generator.get_mean()); 95 | printf("Range: min %d, max %d\n", hist.begin()->first, hist.rbegin()->first); 96 | printf("Overhead (1500B packets): %.3f\n", generator.dist_overhead(1500)); 97 | printf("Average time/sample for generator: %.1f ns\n", avg_ns); 98 | } -------------------------------------------------------------------------------- /util/dist_to_proto.cc: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2023 Homa Developers 2 | * SPDX-License-Identifier: BSD-1-Clause 3 | */ 4 | 5 | #include "dist.h" 6 | extern "C" { 7 | #include "homa.h" 8 | } 9 | #include "iostream" 10 | 11 | /** 12 | * This program takes one of the five workload distributions and converts 13 | * it into a fragment of a textformat protobuf used in distbench. It will first 14 | * merge buckets and truncate cdf_point sizes according to command line 15 | * arguments then write the cdf_points to stdout and the interval conversion 16 | * to stderr. 17 | * 18 | * Usage: 19 | * ./dist_to_proto workload [max message length] [min bucket frac] 20 | * [max size ratio] [gigabits per second] 21 | */ 22 | int main (int argc, char**argv) 23 | { 24 | int max_message_length = HOMA_MAX_MESSAGE_LENGTH; 25 | double min_bucket_frac = 0.0025; 26 | double max_size_ratio = 1.2; 27 | double gbps = 20.0; 28 | if (argc < 2) { 29 | fprintf(stderr, "Usage: %s workload [max message length] " 30 | "[min bucket frac] [max size ratio] [gbps]\n", 31 | argv[0]); 32 | exit(1); 33 | } 34 | if (argc > 2) { 35 | max_message_length = atoi(argv[2]); 36 | } 37 | if (argc > 3) { 38 | min_bucket_frac = std::stod(argv[3]); 39 | } 40 | if (argc > 4) { 41 | max_size_ratio = std::stod(argv[4]); 42 | } 43 | if (argc > 5) { 44 | gbps = std::stod(argv[5]); 45 | } 46 | 47 | dist_point_gen generator(argv[1], max_message_length, 48 | min_bucket_frac, max_size_ratio); 49 | std::vector values = generator.values(); 50 | std::vector fractions = generator.cdf_fractions(); 51 | 52 | for (size_t i = 0; i < values.size(); ++i) { 53 | printf(" cdf_points { value: %d, cdf: %20.19f }\n", 54 | values[i], fractions[i]); 55 | } 56 | 57 | /** 58 | * Convert average size to bits, then divide by gbps and round up to get 59 | * nanoseconds, then multiply by 2 because request size and response 60 | * size are equal 61 | */ 62 | double interval_ns = (std::ceil( (generator.get_mean() * 8.0) / gbps)) 63 | * 2; 64 | fprintf(stderr,"%.0f", interval_ns); 65 | } -------------------------------------------------------------------------------- /util/get_time_trace.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2019-2022 Homa Developers 2 | * SPDX-License-Identifier: BSD-1-Clause 3 | */ 4 | 5 | /** 6 | * This program will read timetrace information from the kernel and 7 | * dump it on stdout. Invoke with no parameters. 8 | */ 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include "test_utils.h" 19 | 20 | #define BUF_SIZE 10000000 21 | char buffer[BUF_SIZE]; 22 | 23 | int main(int argc, char** argv) { 24 | // Fetch the time trace data from the kernel. 25 | int length = syscall(333, buffer, BUF_SIZE); 26 | if (length < 0) { 27 | printf("Error in get_timetrace: %s (%d)", 28 | strerror(errno), errno); 29 | return 1; 30 | } 31 | printf("Kernel returned timetrace with %d bytes\n", length); 32 | if (length == BUF_SIZE) { 33 | printf("Not enough space in buffer for complete timetrace.\n"); 34 | } 35 | buffer[length-1] = 0; 36 | 37 | double cps = get_cycles_per_sec(); 38 | printf("Cycles per second: %g\n", cps); 39 | 40 | // Scan through the records in the buffer. For each record, replace 41 | // the timestamp with more detailed information in ns, and output 42 | // the modified record. 43 | char* current = buffer; 44 | uint64_t start_time, prev_time; 45 | start_time = 0; 46 | while (1) { 47 | char *stamp_end; 48 | double ns, delta_ns; 49 | // printf("Current text: %.50s", current); 50 | uint64_t stamp = strtoull(current, &stamp_end, 10); 51 | if (stamp == 0) { 52 | break; 53 | } 54 | if (start_time == 0) { 55 | start_time = stamp; 56 | prev_time = stamp; 57 | } 58 | ns = (1e09 * (double)(stamp - start_time)) / cps; 59 | delta_ns = (1e09 * (double)(stamp - prev_time)) / cps; 60 | printf("%8.1f ns (+%6.1f ns):", ns, delta_ns); 61 | 62 | for (current = stamp_end; 63 | (*current != 0) && (*current != '\n'); 64 | current++) { 65 | putc(*current, stdout); 66 | } 67 | putc('\n', stdout); 68 | prev_time = stamp; 69 | } 70 | return 0; 71 | } 72 | 73 | -------------------------------------------------------------------------------- /util/get_traces: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (c) 2023 Homa Developers 4 | # SPDX-License-Identifier: BSD-1-Clause 5 | 6 | # Usage: 7 | # get_traces first last dst 8 | # 9 | # This script will retrieve node.tt timetraces from the home directory 10 | # of the nodes with numbers from first to last, inclusive, and store them 11 | # in files nodeN.tt in directory dst. 12 | 13 | if [ $# -ne 3 ]; then 14 | echo "Usage: get_traces first last dst" 15 | exit 1 16 | fi 17 | first=$1 18 | last=$2 19 | dst=$3 20 | 21 | for ((i = $first ; i <= $last; i++)); do 22 | node=node$i 23 | echo $node 24 | mkdir -p $dst 25 | cl ssh $node cat node.tt > $dst/$node.tt 26 | done 27 | -------------------------------------------------------------------------------- /util/inc_tput.cc: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2024 Homa Developers 2 | * SPDX-License-Identifier: BSD-1-Clause 3 | */ 4 | 5 | /* This program measures the throughput of atomic increments in the face 6 | * of many concurrent cores invoking it. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | #include 17 | #include 18 | 19 | std::atomic_int value = 0; 20 | std::vector thread_counts; 21 | 22 | /** 23 | * rdtsc(): return the current value of the fine-grain CPU cycle counter 24 | * (accessed via the RDTSC instruction). 25 | */ 26 | inline static uint64_t rdtsc(void) 27 | { 28 | uint32_t lo, hi; 29 | __asm__ __volatile__("rdtsc" : "=a" (lo), "=d" (hi)); 30 | return (((uint64_t)hi << 32) | lo); 31 | } 32 | 33 | void increment(int index) 34 | { 35 | while (1) { 36 | value.fetch_add(1); 37 | thread_counts[index]++; 38 | } 39 | } 40 | 41 | int main(int argc, char** argv) 42 | { 43 | int num_threads = 1; 44 | int i; 45 | std::vector old_counts; 46 | 47 | if (argc == 2) { 48 | char *end; 49 | num_threads = strtol(argv[1], &end, 0); 50 | if (*end != 0) { 51 | printf("Illegal argument %s: must be integer\n", 52 | argv[1]); 53 | exit(1); 54 | } 55 | } else if (argc != 1) { 56 | printf("Usage: %s [num_threads]\n", argv[0]); 57 | } 58 | 59 | for (i = 0; i < num_threads; i++) { 60 | thread_counts.emplace_back(0); 61 | old_counts.emplace_back(0); 62 | new std::thread(increment, i); 63 | } 64 | 65 | struct timeval prev_time, cur_time; 66 | gettimeofday(&prev_time, nullptr); 67 | uint64_t old_value = value; 68 | while (1) { 69 | sleep(1); 70 | gettimeofday(&cur_time, nullptr); 71 | uint64_t new_value = value; 72 | double diff = new_value - old_value; 73 | double secs = cur_time.tv_sec - prev_time.tv_sec; 74 | secs += 1e-6*(cur_time.tv_usec - prev_time.tv_usec); 75 | printf("%.2f Mops/sec [", (diff/secs)*1e-6); 76 | const char *sep = ""; 77 | for (i = 0; i < num_threads; i++) { 78 | int new_count = thread_counts[i]; 79 | diff = new_count - old_counts[i]; 80 | printf("%s%.2f", sep, (diff/secs)*1e-6); 81 | sep = " "; 82 | old_counts[i] = new_count; 83 | } 84 | printf("]\n"); 85 | prev_time = cur_time; 86 | old_value = new_value; 87 | } 88 | } -------------------------------------------------------------------------------- /util/plot.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright (c) 2023 Homa Developers 4 | # SPDX-License-Identifier: BSD-1-Clause 5 | 6 | # This file provides a library of functions for generating plots. 7 | 8 | import matplotlib 9 | import matplotlib.pyplot as plt 10 | import os 11 | from pathlib import Path 12 | import re 13 | import string 14 | import sys 15 | 16 | from cperf import * 17 | 18 | # Standard colors for plotting 19 | color_green = '#00B000' 20 | color_blue = '#1759BB' 21 | color_red = '#d62728' 22 | tcp_color = '#00B000' 23 | tcp_color2 = '#5BD15B' 24 | tcp_color3 = '#96E296' 25 | homa_color = '#1759BB' 26 | homa_color2 = '#6099EE' 27 | homa_color3 = '#A6C6F6' 28 | dctcp_color = '#7A4412' 29 | dctcp_color2 = '#CB701D' 30 | dctcp_color3 = '#EAA668' 31 | unloaded_color = '#d62728' 32 | 33 | matplotlib.rc('mathtext', default='regular') 34 | 35 | # Dictionary containing all data that has been read from files so far. 36 | # Keys are file names, values are dictionaries of columns for that file, 37 | # in which keys are column names and values are lists of the values 38 | # in that column. 39 | file_data = {} 40 | 41 | def __read_file(file): 42 | """ 43 | Read a file and add its contents to the file_data variable. If the 44 | file has already been read, then this function does nothing. 45 | 46 | file: Path name of the file to read. Lines starting with '#' are 47 | considered comments and ignored, as are blank lines. Of the 48 | non-blank non-comment lines, the first contains space-separated 49 | column names, and the others contain data for those columns. 50 | """ 51 | global file_data 52 | 53 | if file in file_data: 54 | return 55 | columns = {} 56 | names = None 57 | f = open(file) 58 | for line in f: 59 | fields = line.strip().split() 60 | if len(fields) == 0: 61 | continue 62 | if fields[0].startswith('#'): 63 | continue 64 | if not names: 65 | names = fields 66 | for n in names: 67 | if n in columns: 68 | print('Duplicate column name %s in %s' % (file, n), 69 | file=sys.stderr()) 70 | columns[n] = [] 71 | else: 72 | if len(fields) != len(names): 73 | print('Bad line in %s: %s (expected %d columns, got %d)' 74 | % (file, line.rstrip(), len(columns), len(fields)), 75 | file=sys.stderr) 76 | continue 77 | for i in range(0, len(names)): 78 | try: 79 | value = float(fields[i]) 80 | except ValueError: 81 | value = fields[i] 82 | columns[names[i]].append(value) 83 | f.close() 84 | file_data[file] = columns 85 | 86 | def get_column(file, column): 87 | """ 88 | Return a list containing the values of a given column in a given file. 89 | 90 | file: Path name of the file containing the desired column. 91 | column: Name of the column within that file. 92 | """ 93 | 94 | __read_file(file) 95 | if not column in file_data[file]: 96 | raise Exception('Column %s doesn\'t exist in %s' % (column, name)) 97 | return file_data[file][column] 98 | 99 | def get_column_names(file): 100 | """ 101 | Returns a list containing the names of all of the columns in file. 102 | """ 103 | 104 | __read_file(file) 105 | return file_data[file].keys() 106 | 107 | def get_numbers(file): 108 | """ 109 | Scans all of the column names in file for numbers and returns a 110 | sorted list of all the unique numbers found. 111 | """ 112 | 113 | numbers = set() 114 | for name in get_column_names(file): 115 | match = re.match('[^0-9]*([0-9]+)', name) 116 | if match: 117 | numbers.add(int(match.group(1))) 118 | return sorted(list(numbers)) 119 | 120 | def max_value(file, columns): 121 | """ 122 | Returns the largest value in a set of columns. 123 | 124 | columns: A list of column names. 125 | """ 126 | 127 | overall_max = None 128 | for column in columns: 129 | col_max = max(get_column(file, column)) 130 | if (overall_max == None) or (col_max > overall_max): 131 | overall_max = col_max 132 | return overall_max 133 | 134 | def node_name(file): 135 | """ 136 | Given the name of a trace file, return a shorter name that can be 137 | used (e.g. in titles) to identify the node represented by the file. 138 | """ 139 | name = Path(file).stem 140 | i = name.rfind('_') 141 | if i != -1: 142 | name = name[i+1:] 143 | return name 144 | 145 | def start_plot(max_x, max_y, title="", x_label="", y_label="", size=10, 146 | figsize=[6,4]): 147 | """ 148 | Create a basic pyplot graph without plotting any data. Returns the 149 | Axes object for the plot. 150 | 151 | max_x: Maximum x-coordinate 152 | max_y: Maximum y-coordinate 153 | title: Title for the plot; empty means no title 154 | x_label: Label for x-axis 155 | y_label: Label for y-axis 156 | size: Size to use for fonts 157 | figsize: Dimensions of plot 158 | """ 159 | 160 | fig = plt.figure(figsize=figsize) 161 | ax = fig.add_subplot(111) 162 | if title != '': 163 | ax.set_title(title, size=size) 164 | ax.set_xlim(0, max_x) 165 | ax.set_ylim(1, max_y) 166 | if x_label: 167 | ax.set_xlabel(x_label, size=size) 168 | if y_label: 169 | ax.set_ylabel(y_label, size=size) 170 | return ax 171 | 172 | def plot_colors(file): 173 | """ 174 | Generates a test plot that shows the standard colors defined above. 175 | 176 | file: Name of PDF file in which to write the plot. 177 | """ 178 | 179 | ax = start_plot(200, 100, title='Standard Colors') 180 | ax.plot([0, 200], [65, 65], color=color_green, label='color_green') 181 | ax.plot([0, 200], [60, 60], color=color_blue, label='color_blue') 182 | ax.plot([0, 200], [55, 55], color=color_red, label='color_red') 183 | ax.plot([0, 200], [50, 50], color=tcp_color, label='tcp_color') 184 | ax.plot([0, 200], [45, 45], color=tcp_color2, label='tcp_color2') 185 | ax.plot([0, 200], [40, 40], color=tcp_color3, label='tcp_color3') 186 | ax.plot([0, 200], [35, 35], color=homa_color, label='homa_color') 187 | ax.plot([0, 200], [30, 30], color=homa_color2, label='homa_color2') 188 | ax.plot([0, 200], [25, 25], color=homa_color3, label='homa_color3') 189 | ax.plot([0, 200], [20, 20], color=dctcp_color, label='dctcp_color') 190 | ax.plot([0, 200], [15, 15], color=dctcp_color2, label='dctcp_color2') 191 | ax.plot([0, 200], [10, 10], color=dctcp_color3, label='dctcp_color3') 192 | ax.plot([0, 200], [5, 5], color=unloaded_color, label='unloaded_color') 193 | ax.legend(loc='upper right', prop={'size': 9}) 194 | plt.tight_layout() 195 | plt.savefig(file) -------------------------------------------------------------------------------- /util/plot_tthoma.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright (c) 2023 Homa Developers 4 | # SPDX-License-Identifier: BSD-1-Clause 5 | 6 | # This file provides a collection of functions that plot data generated 7 | # by tthoma.py. Invoke with the --help option for more information. 8 | 9 | from glob import glob 10 | from optparse import OptionParser 11 | import math 12 | import matplotlib 13 | import matplotlib.pyplot as plt 14 | import os 15 | from pathlib import Path 16 | import re 17 | import string 18 | import sys 19 | 20 | import plot 21 | 22 | def backlog(data_file, plot_file): 23 | """ 24 | Generates a plot of network backlog data produced by the "net" 25 | analyzer of tthoma.py. 26 | 27 | data_file: Backlog data file generated by tthoma.py. 28 | plot_file: Name of the file in which to output a plot. 29 | """ 30 | global options 31 | 32 | cores = plot.get_numbers(data_file) 33 | if options.cores: 34 | cores = sorted(list(set(cores).intersection(options.cores))) 35 | columns = [] 36 | core_names = [] 37 | for core in cores: 38 | columns.append('Back%d' % core) 39 | core_names.append('C%02d' % core) 40 | times = plot.get_column(data_file, 'Time') 41 | xmax = max(times) 42 | ymax = plot.max_value(data_file, columns) 43 | 44 | ax = plot.start_plot(xmax, ymax, x_label='Time', 45 | y_label='KB In Flight For %s Cores' % (plot.node_name(data_file))) 46 | for i in range(len(columns)): 47 | ax.plot(times, plot.get_column(data_file, columns[i]), 48 | label=core_names[i], linewidth=0.8) 49 | ax.legend(loc='upper right', prop={'size': 9}) 50 | plt.tight_layout() 51 | plt.savefig(plot_file) 52 | 53 | 54 | def colors(plot_file): 55 | """ 56 | Generates a plot displaying standard colors. 57 | 58 | plot_file: Name of the file in which to output a plot. 59 | """ 60 | 61 | plot.plot_colors(plot_file) 62 | 63 | # Parse command-line options. 64 | parser = OptionParser(description= 65 | 'Reads data output by tthoma.py and generates a plot. func is ' 66 | 'the name of a function in this file, which will be invoked to ' 67 | 'generate a particular plot; args provide additional information to ' 68 | 'func if needed. Read the in-code documentation for the functions ' 69 | 'for details on what kinds of plots are available.', 70 | usage='%prog [options] func arg arg ...', 71 | conflict_handler='resolve') 72 | parser.add_option('--cores', dest='cores', default=None, 73 | metavar='CORES', help='space-separated list of integer core numbers; ' 74 | 'plots will include data from these cores only, where appropriate') 75 | (options, args) = parser.parse_args() 76 | 77 | if options.cores != None: 78 | options.cores = list(map(int, options.cores.split(" "))) 79 | 80 | if len(args) < 1: 81 | print('No func was specified') 82 | parser.print_help() 83 | exit(1) 84 | 85 | if not args[0] in locals(): 86 | print('There is no function %s' % (args[0])) 87 | parser.print_help() 88 | exit(1) 89 | 90 | locals()[args[0]](*args[1:]) -------------------------------------------------------------------------------- /util/receive_raw.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2019-2022 Homa Developers 2 | * SPDX-License-Identifier: BSD-1-Clause 3 | */ 4 | 5 | /* This is a test program that uses a raw socket to receive packets 6 | * on a given protocol and print their contents. 7 | * 8 | * Usage: receive_raw [protocol] 9 | */ 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #include "../homa.h" 22 | 23 | int main(int argc, char** argv) { 24 | int fd; 25 | int protocol; 26 | ssize_t size; 27 | #define BUF_SIZE 2000 28 | char buffer[BUF_SIZE]; 29 | struct ip* ip_header = (struct ip *) buffer; 30 | int header_length; 31 | 32 | if (argc >= 2) { 33 | protocol = strtol(argv[1], NULL, 10); 34 | if (protocol == 0) { 35 | printf("Bad protocol number %s; must be integer\n", 36 | argv[3]); 37 | exit(1); 38 | } 39 | } else { 40 | protocol = IPPROTO_HOMA; 41 | } 42 | 43 | fd = socket(AF_INET, SOCK_RAW, protocol); 44 | if (fd < 0) { 45 | printf("Couldn't open raw socket: %s\n", strerror(errno)); 46 | exit(1); 47 | } 48 | 49 | while (1) { 50 | size = recvfrom(fd, buffer, BUF_SIZE, 0, NULL, 0); 51 | if (size < 0) { 52 | printf("Error receiving packet: %s\n", strerror(errno)); 53 | exit(1); 54 | } 55 | header_length = 4 * ip_header->ip_hl; 56 | // printf("IP header length: %d bytes\n", header_length); 57 | buffer[size] = 0; 58 | printf("%s\n", buffer + header_length); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /util/scratch.c: -------------------------------------------------------------------------------- 1 | // This is a scratch file used for writing temporary code to test 2 | // how it works. It has no long term value. 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #define print_type(name) printf("%s has type '%s'\n", #name, "__typeof__(name)"); 11 | 12 | int main(int argc, char** argv) { 13 | // int value; 14 | // if (argc >= 2) { 15 | // sscanf(argv[1], "%x", &value); 16 | // } else { 17 | // value = 0x12345; 18 | // } 19 | 20 | uint64_t x = 0x1234500001; 21 | uint64_t y = (x + 63) & ~0x3f; 22 | printf("x: %lx, y: %lx\n", x, y); 23 | return 0; 24 | } 25 | 26 | -------------------------------------------------------------------------------- /util/send_many: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Repeatedly invoke sendHoma (see if the system hangs because of 3 | # sk_buff exhaustion?) 4 | 5 | count=1 6 | while : 7 | do 8 | ./homaSend rc71 "Test message #$count" 9 | echo "Sent message #$count" 10 | count=$((count+1)) 11 | done -------------------------------------------------------------------------------- /util/send_raw.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2019-2022 Homa Developers 2 | * SPDX-License-Identifier: BSD-1-Clause 3 | */ 4 | 5 | /* This is a test program that will send a packet to a given 6 | * IP protocol, with given contents. 7 | * 8 | * Usage: send_raw hostName contents [protocol] 9 | */ 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #include "homa.h" 21 | #include "test_utils.h" 22 | 23 | int main(int argc, char** argv) { 24 | int fd, status; 25 | struct addrinfo *result; 26 | struct addrinfo hints; 27 | char *message; 28 | char *host; 29 | int protocol; 30 | union sockaddr_in_union *addr; 31 | uint8_t *bytes; 32 | 33 | if (argc < 3) { 34 | printf("Usage: %s hostName contents [protocol]\n", argv[0]); 35 | exit(1); 36 | } 37 | host = argv[1]; 38 | message = argv[2]; 39 | if (argc >= 4) { 40 | protocol = strtol(argv[3], NULL, 10); 41 | if (protocol == 0) { 42 | printf("Bad protocol number %s; must be integer\n", 43 | argv[3]); 44 | exit(1); 45 | } 46 | } else { 47 | protocol = IPPROTO_HOMA; 48 | } 49 | 50 | memset(&hints, 0, sizeof(struct addrinfo)); 51 | hints.ai_family = AF_INET; 52 | hints.ai_socktype = SOCK_DGRAM; 53 | status = getaddrinfo(host, "80", &hints, &result); 54 | if (status != 0) { 55 | printf("Couldn't look up address for %s: %s\n", 56 | host, gai_strerror(status)); 57 | exit(1); 58 | } 59 | addr = (union sockaddr_in_union*) result->ai_addr; 60 | bytes = (uint8_t *) &addr->in4.sin_addr; 61 | printf("Destination address: %x (%d.%d.%d.%d)\n", addr->in4.sin_addr.s_addr, 62 | bytes[0], bytes[1], bytes[2], bytes[3]); 63 | 64 | fd = socket(AF_INET, SOCK_RAW, protocol); 65 | if (fd < 0) { 66 | printf("Couldn't open raw socket: %s\n", strerror(errno)); 67 | exit(1); 68 | } 69 | 70 | status = sendto(fd, message, strlen(message), 0, result->ai_addr, 71 | result->ai_addrlen); 72 | if (status < 0) { 73 | printf("Error in sendto: %s\n", strerror(errno)); 74 | } else { 75 | printf("Sendto succeeded\n"); 76 | } 77 | exit(0); 78 | } 79 | 80 | -------------------------------------------------------------------------------- /util/smi.cc: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2022 Homa Developers 2 | * SPDX-License-Identifier: BSD-1-Clause 3 | */ 4 | 5 | /* This program spawns a collection of threads on different cores to 6 | * detect SMI interrupts, during which all of the cores are simultaneously 7 | * paused. It outputs information about the frequency and length of the 8 | * SMIs. 9 | * 10 | * Usage: 11 | * smi core core ... 12 | */ 13 | 14 | #include 15 | #include 16 | #include 17 | 18 | #include 19 | #include 20 | 21 | #include "test_utils.h" 22 | 23 | #define usecs(x) (to_seconds(x)*1e06) 24 | 25 | #define ms(x) (to_seconds(x)*1e03) 26 | 27 | /** 28 | * Holds information about gaps for a single thread (periods of time when 29 | * that thread was not executing). 30 | */ 31 | #define MAX_GAPS 1000 32 | struct thread_gaps { 33 | /* Index in gaps of next gap to fill in. */ 34 | int next; 35 | 36 | /* Used only wnen scanning: current gap being considered. */ 37 | int current; 38 | 39 | struct { 40 | uint64_t start; 41 | uint64_t end; 42 | } gaps[MAX_GAPS]; 43 | 44 | thread_gaps() 45 | : next(0), current(0), gaps() 46 | {} 47 | }; 48 | 49 | /** 50 | * Used to collect information about identified gaps, in order to find 51 | * previous gaps of about the same duration. 52 | */ 53 | struct prev_gap { 54 | /* Starting time for the gap. */ 55 | uint64_t start; 56 | 57 | /* How long it lasted, in rdtsc units. */ 58 | uint64_t duration; 59 | 60 | prev_gap(uint64_t start, uint64_t duration) 61 | : start(start), duration(duration) 62 | {} 63 | }; 64 | 65 | /* Minimum length (in rdtsc cycles) for a gap to be considered meaningful. */ 66 | uint64_t min_gap_length; 67 | 68 | /** 69 | * record_gaps() - Loop infinitely, recording info about execution gaps, 70 | * until gaps is full. 71 | * @gaps: Structure to fill in with gap information. 72 | * @core: Core on which to run. 73 | */ 74 | void record_gaps(struct thread_gaps *gaps, int core) 75 | { 76 | pin_thread(core); 77 | // printf("Pinned thread to core %d\n", core); 78 | while (gaps->next < MAX_GAPS) { 79 | uint64_t start, end; 80 | start = rdtsc(); 81 | while (1) { 82 | end = rdtsc(); 83 | if ((end - start) >= min_gap_length) { 84 | break; 85 | } 86 | start = end; 87 | } 88 | gaps->gaps[gaps->next].start = start; 89 | gaps->gaps[gaps->next].end = end; 90 | gaps->next++; 91 | } 92 | } 93 | 94 | int main(int argc, char** argv) { 95 | std::vector cores; 96 | int i, num_cores; 97 | uint64_t time0; 98 | 99 | /* Minimum gap is 1 usec. */ 100 | min_gap_length = static_cast(get_cycles_per_sec())/1000000; 101 | 102 | if ((argc == 2) && (strcmp(argv[1], "--help") == 0)) { 103 | printf("Usage: smi [core core ...]\n"); 104 | printf("With no arguments, runs on a preset group of cores\n"); 105 | exit(0); 106 | } 107 | 108 | for (i = 1; i < argc; i++) { 109 | char *end; 110 | int core = strtol(argv[i], &end, 10); 111 | if ((*end != 0) || (core < 0)) { 112 | fprintf(stderr, "Bad core number %s: must be positive " 113 | "integer\n", argv[i]); 114 | exit(1); 115 | } 116 | } 117 | if (cores.empty()) { 118 | for (i = 0; i < 10; i++) { 119 | cores.push_back(i); 120 | } 121 | } 122 | num_cores = static_cast(cores.size()); 123 | 124 | time0 = rdtsc(); 125 | std::vector thread_gaps; 126 | std::vector threads; 127 | for (int core: cores) { 128 | struct thread_gaps *g = new struct thread_gaps; 129 | thread_gaps.push_back(g); 130 | threads.emplace_back(record_gaps, g, core); 131 | } 132 | for (i = 0; i < num_cores; i++) { 133 | threads[i].join(); 134 | } 135 | uint64_t overlap = rdtsc() - time0; 136 | printf("Each line gives the starting time for a gap, plus the elapsed\n"); 137 | printf("time since the previous gap of a similar duration.\n"); 138 | 139 | /* Each iteration through this loop checks to see if the current 140 | * gaps from all of the cores are concurrent. If so, it records 141 | * that gap. Otherwise, it discards the oldest gap. 142 | */ 143 | uint64_t total_gaps = 0; 144 | int num_gaps = 0; 145 | std::vector found; 146 | while (true) { 147 | int oldest = 0; 148 | uint64_t oldest_start = 0, latest_start = 0, earliest_end = 0; 149 | for (i = 0; i < num_cores; i++) { 150 | struct thread_gaps *gaps = thread_gaps[i]; 151 | if (gaps->current >= MAX_GAPS) { 152 | goto done; 153 | } 154 | uint64_t start = gaps->gaps[gaps->current].start; 155 | uint64_t end = gaps->gaps[gaps->current].end; 156 | // printf("Gap on core %d [%d]: %.1f .. %.1f\n", i, 157 | // gaps->current, usecs(start - time0), 158 | // usecs(end - time0)); 159 | if (i == 0) { 160 | oldest = 0; 161 | oldest_start = start; 162 | latest_start = start; 163 | earliest_end = end; 164 | } else { 165 | if (start < oldest_start) { 166 | oldest = i; 167 | oldest_start = start; 168 | } 169 | if (start > latest_start) { 170 | latest_start = start; 171 | } 172 | if (end < earliest_end) { 173 | earliest_end = end; 174 | } 175 | } 176 | } 177 | uint64_t overlap = (earliest_end > latest_start) 178 | ? earliest_end - latest_start : 0; 179 | // printf("latest_start %.1f, earliest_end %.1f, overlap %.1f\n", 180 | // usecs(latest_start - time0), 181 | // usecs(earliest_end - time0), 182 | // usecs(overlap)); 183 | if (overlap >= min_gap_length ) { 184 | /* We have a consistent gap across all cores. */ 185 | num_gaps++; 186 | total_gaps += overlap; 187 | 188 | /* Find the most recent event of similar duration. */ 189 | uint64_t prev_start = time0; 190 | for (int j = static_cast(found.size())-1; 191 | j >= 0; j--) { 192 | // printf("Checking found[%d]: start %.1f ms, duration %.1f us\n", 193 | // j, ms(found[j].start - time0), 194 | // usecs(found[j].duration)); 195 | uint64_t prev = found[j].duration; 196 | uint64_t delta = prev; 197 | if (overlap < delta) { 198 | delta = overlap; 199 | } 200 | delta = delta/4; 201 | // printf("prev %lu, overlap %lu, delta %lu\n", 202 | // prev, overlap, delta); 203 | if (((prev + delta) >= overlap) 204 | && ((overlap + delta) >= prev)) { 205 | prev_start = found[j].start; 206 | break; 207 | } 208 | } 209 | found.emplace_back(latest_start, overlap); 210 | printf("%5.1f ms [+%5.1f ms] gap of %.1f usec\n", 211 | ms(latest_start - time0), 212 | ms(latest_start - prev_start), 213 | usecs(overlap)); 214 | for (i = 0; i < num_cores; i++) { 215 | thread_gaps[i]->current++; 216 | } 217 | } else { 218 | /* Nothing consistent; drop the oldest gap. */ 219 | // printf("Dropping gap %d of core %d\n", 220 | // thread_gaps[oldest]->current, oldest); 221 | thread_gaps[oldest]->current++; 222 | } 223 | } 224 | done: 225 | printf("%d gaps (every %.1f ms), total gap time %.1f usec (%.2f%% of all time)\n", 226 | num_gaps, (usecs(overlap)/1000)/num_gaps, 227 | usecs(total_gaps), 228 | 100.0*usecs(total_gaps)/usecs(overlap)); 229 | exit(0); 230 | } 231 | -------------------------------------------------------------------------------- /util/smi.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | """ 4 | Scans a timetrace looking for long gaps where no cores have any events 5 | (probably because of System Management Interrupts) 6 | Usage: ttsmi.py [tt_file] 7 | 8 | The existing timetrace is in tt_file (or stdin in tt_file is omitted). 9 | """ 10 | 11 | from __future__ import division, print_function 12 | from glob import glob 13 | from optparse import OptionParser 14 | import math 15 | import os 16 | import re 17 | import string 18 | import sys 19 | 20 | if len(sys.argv) == 2: 21 | f = open(sys.argv[1]) 22 | elif len(sys.argv) == 1: 23 | f = sys.stdin 24 | else: 25 | print("Usage: %s [tt_file]" % (sys.argv[0])) 26 | sys.exit(1) 27 | 28 | prev_time = 0 29 | printed = 0 30 | 31 | for line in f: 32 | match = re.match(' *([-0-9.]+) us .* \[C([0-9]+)\]', line) 33 | if not match: 34 | continue 35 | time = float(match.group(1)) 36 | core = int(match.group(2)) 37 | 38 | if (time - prev_time) > 150: 39 | print(line.rstrip()) 40 | printed += 1 41 | if printed >= 5: 42 | exit(0) 43 | 44 | prev_time = time 45 | -------------------------------------------------------------------------------- /util/test_time_trace.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2019-2022 Homa Developers 2 | * SPDX-License-Identifier: BSD-1-Clause 3 | */ 4 | 5 | /* This program exercises the Linux kernel time trace mechanism 6 | * by calling a new system call that creates time traces. 7 | */ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | int main(int argc, char** argv) { 18 | int i; 19 | printf("Invoking new 'test_timetrace' syscall.\n"); 20 | for (i = 0; i < 100; i++) { 21 | int status = syscall(334); 22 | if (status < 0) { 23 | printf(" Error in test_timetrace: %s (%d)", 24 | strerror(errno), errno); 25 | } 26 | } 27 | return 0; 28 | } 29 | 30 | -------------------------------------------------------------------------------- /util/test_utils.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2019-2022 Homa Developers 2 | * SPDX-License-Identifier: BSD-1-Clause 3 | */ 4 | 5 | #ifndef _TEST_UTILS_H 6 | #define _TEST_UTILS_H 7 | 8 | #include 9 | 10 | #ifdef __cplusplus 11 | #include 12 | #include 13 | #endif 14 | 15 | #include "homa.h" 16 | 17 | #ifdef __cplusplus 18 | extern "C" 19 | { 20 | #endif 21 | 22 | /** 23 | * Holds either an IPv4 or IPv6 address (smaller and easier to use than 24 | * sockaddr_storage). 25 | */ 26 | union sockaddr_in_union { 27 | struct sockaddr sa; 28 | struct sockaddr_in in4; 29 | struct sockaddr_in6 in6; 30 | }; 31 | 32 | /** 33 | * sockaddr_size() - Return the number of bytes used by the argument. 34 | * @sa: Pointer to either an IPv4 or an IPv6 address. 35 | */ 36 | static inline uint32_t sockaddr_size(const struct sockaddr *sa) 37 | { 38 | return (sa->sa_family == AF_INET) ? sizeof(struct sockaddr_in) : 39 | sizeof(struct sockaddr_in6); 40 | } 41 | 42 | #define sizeof32(type) static_cast(sizeof(type)) 43 | 44 | extern int check_buffer(void *buffer, size_t length); 45 | extern int check_message(struct homa_recvmsg_args *control, 46 | char *region, size_t length, int skip); 47 | extern double get_cycles_per_sec(); 48 | extern int get_int(const char *s, const char *msg); 49 | extern void pin_thread(int core); 50 | extern const char* 51 | print_address(const union sockaddr_in_union *addr); 52 | extern void print_dist(uint64_t times[], int count); 53 | extern void seed_buffer(void *buffer, size_t length, int seed); 54 | #ifdef __cplusplus 55 | extern void split(const char *s, char sep, std::vector &dest); 56 | #endif 57 | extern double to_seconds(uint64_t cycles); 58 | 59 | /** 60 | * rdtsc(): return the current value of the fine-grain CPU cycle counter 61 | * (accessed via the RDTSC instruction). 62 | */ 63 | inline static uint64_t rdtsc(void) 64 | { 65 | uint32_t lo, hi; 66 | __asm__ __volatile__("rdtsc" : "=a" (lo), "=d" (hi)); 67 | return (((uint64_t)hi << 32) | lo); 68 | } 69 | 70 | #ifdef __cplusplus 71 | } 72 | #endif 73 | 74 | #endif /* _TEST_UTILS_H */ 75 | -------------------------------------------------------------------------------- /util/time_trace.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2020-2022 Homa Developers 2 | * SPDX-License-Identifier: BSD-1-Clause 3 | */ 4 | 5 | #ifndef TIMETRACE_H 6 | #define TIMETRACE_H 7 | 8 | #include 9 | #include 10 | 11 | #include "test_utils.h" 12 | 13 | // Change 1 -> 0 in the following line to disable time tracing globally. 14 | #define ENABLE_TIME_TRACE 1 15 | 16 | /** 17 | * class time_trace - Implements a circular buffer of entries, each of which 18 | * consists of a fine-grain timestamp, a short descriptive string, and 19 | * a few additional values. It's typically used to record times at 20 | * various points in an operation, in order to find performance bottlenecks. 21 | * It can record a trace relatively efficiently (< 10ns as of 7/2020), 22 | * and then either return the trace either as a string or print it to 23 | * the system log. 24 | * 25 | * This class is thread-safe. By default, trace information is recorded 26 | * separately for each thread in order to avoid synchronization and cache 27 | * consistency overheads; the thread-local traces are merged by when the 28 | * timetrace is printed, so the existence of multiple trace buffers is 29 | * normally invisible. 30 | * 31 | * The time_trace class should never be constructed; it offers only 32 | * static methods. 33 | * 34 | * If you want to use a single trace buffer rather than per-thread 35 | * buffers, see the subclass time_trace::buffer below. 36 | */ 37 | class time_trace { 38 | public: 39 | static void cleanup(); 40 | static void freeze(); 41 | static std::string get_trace(); 42 | static int print_to_file(const char *name); 43 | 44 | /** @frozen: nonzero means that the timetrace is already frozen. */ 45 | static int frozen; 46 | 47 | protected: 48 | class buffer; 49 | 50 | /** 51 | * @tb: points to a private per-thread time_trace::buffer; 52 | * NULL means no such object exists for the current thread. 53 | */ 54 | static __thread buffer* tb; 55 | 56 | /** 57 | * @thread_buffers: holds pointers to all of the existing thread-private 58 | * buffers. Entries get deleted only by free_unused. 59 | */ 60 | static std::vector thread_buffers; 61 | 62 | public: 63 | 64 | /** 65 | * record() - Record an event in a thread-local buffer. 66 | * @timestamp: The time at which the event occurred. 67 | * @format: A format string for snprintf that will be used, along 68 | * with arg0..arg3, to generate a human-readable message 69 | * describing what happened, when the time trace is printed. 70 | * The message is generated by calling snprintf as follows: 71 | * snprintf(buffer, size, format, arg0, arg1, arg2, arg3) 72 | * where format and arg0..arg3 are the corresponding 73 | * arguments to this method. This pointer is stored in the 74 | * time trace, so the caller must ensure that its contents 75 | * will not change over its lifetime in the trace. 76 | * @arg0: Argument to use when printing a message about this event. 77 | * @arg1: Argument to use when printing a message about this event. 78 | * @arg2: Argument to use when printing a message about this event. 79 | * @arg3: Argument to use when printing a message about this event. 80 | */ 81 | static inline void record(uint64_t timestamp, const char* format, 82 | uint32_t arg0 = 0, uint32_t arg1 = 0, 83 | uint32_t arg2 = 0, uint32_t arg3 = 0) { 84 | #if ENABLE_TIME_TRACE 85 | tb->record(timestamp, format, arg0, arg1, arg2, arg3); 86 | #endif 87 | } 88 | static inline void record(const char* format, uint32_t arg0 = 0, 89 | uint32_t arg1 = 0, uint32_t arg2 = 0, uint32_t arg3 = 0) { 90 | #if ENABLE_TIME_TRACE 91 | record(rdtsc(), format, arg0, arg1, arg2, arg3); 92 | #endif 93 | } 94 | 95 | protected: 96 | time_trace(); 97 | static void print_internal(std::string* s, FILE *f); 98 | 99 | /** struct event - Holds one entry in a time_trace::buffer. */ 100 | struct event { 101 | /* See documentation for record method. */ 102 | uint64_t timestamp; 103 | const char* format; 104 | uint32_t arg0; 105 | uint32_t arg1; 106 | uint32_t arg2; 107 | uint32_t arg3; 108 | }; 109 | 110 | /** 111 | * class buffer - Represents a sequence of events generated by a single 112 | * thread. Has a fixed capacity, so slots are re-used on a circular 113 | * basis. This class is not thread-safe. 114 | */ 115 | class buffer { 116 | public: 117 | buffer(std::string name); 118 | ~buffer(); 119 | void record(uint64_t timestamp, const char* format, 120 | uint32_t arg0 = 0, uint32_t arg1 = 0, 121 | uint32_t arg2 = 0, uint32_t arg3 = 0); 122 | void reset(); 123 | 124 | public: 125 | /** @name: name that identifies this buffer/thread. */ 126 | std::string name; 127 | 128 | /** 129 | * @BUFFER_SIZE_EXP: determines the number of events we can 130 | * retain, as an exponent of 2. 131 | */ 132 | static const uint8_t BUFFER_SIZE_EXP = 16; 133 | 134 | /** 135 | * @BUFFER_SIZE: total number of events that we can retain 136 | * at any given time. 137 | */ 138 | static const uint32_t BUFFER_SIZE = 1 << BUFFER_SIZE_EXP; 139 | 140 | /** 141 | * @BUFFER_MASK: bit mask used to implement a circular event buffer. 142 | */ 143 | static const uint32_t BUFFER_MASK = BUFFER_SIZE - 1; 144 | 145 | /** 146 | * @next_index: index within events of the slot to use for the next 147 | * call to record. 148 | */ 149 | int next_index; 150 | 151 | /** 152 | * @ref_count: number of thread_buffer objects that reference 153 | * this buffer. When this count becomes 0, the buffer can be 154 | * deleted in the next call to time_trace::cleanup. 155 | */ 156 | int ref_count; 157 | 158 | /** 159 | * @events: Holds information from the most recent calls to record. 160 | */ 161 | time_trace::event events[BUFFER_SIZE]; 162 | 163 | friend class time_trace; 164 | }; 165 | 166 | public: 167 | /** 168 | * class thread_buffer - One of these should be instantiated as a 169 | * local variable in the top-level function for each thread that 170 | * invokes tt. Such a variable ensures that a buffer is available 171 | * for the lifetime of that thread. 172 | */ 173 | class thread_buffer { 174 | public: 175 | thread_buffer(std::string name); 176 | ~thread_buffer(); 177 | 178 | protected: 179 | /* The buffer associated with this thread. Malloc-ed. The 180 | * "official" reference to this is the one in thread_buffers. 181 | */ 182 | time_trace::buffer *buffer; 183 | }; 184 | }; 185 | 186 | #define tt time_trace::record 187 | 188 | #endif // TIMETRACE_H 189 | 190 | -------------------------------------------------------------------------------- /util/tput.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | """ 4 | Analyzes throughput of message arrivals in a timetrace. 5 | Usage: tput.py [--verbose] [tt_file] 6 | 7 | The existing timetrace is in tt_file (or stdin in tt_file is omitted). 8 | """ 9 | 10 | from __future__ import division, print_function 11 | from glob import glob 12 | from optparse import OptionParser 13 | import math 14 | import os 15 | import re 16 | import string 17 | import sys 18 | 19 | verbose = False 20 | if (len(sys.argv) >= 2) and (sys.argv[1] == "--verbose"): 21 | verbose = True 22 | sys.argv.pop(1) 23 | if len(sys.argv) == 2: 24 | f = open(sys.argv[1]) 25 | elif len(sys.argv) == 1: 26 | f = sys.stdin 27 | else: 28 | print("Usage: %s [tt_file]" % (sys.argv[0])) 29 | sys.exit(1) 30 | 31 | # Keys are RPC ids, values are dictionaries containing the following fields: 32 | # start: time offset 0 received 33 | # grant: time the first grant was sent 34 | # grant_offset: offset in last data packet after first grant 35 | # end: time last packet received 36 | # offset: highest offset in any packet received for the RPC 37 | rpcs = {} 38 | 39 | for line in f: 40 | match = re.match(' *([-0-9.]+) us .* \[C([0-9]+)\]', line) 41 | if not match: 42 | continue 43 | time = float(match.group(1)) 44 | core = match.group(2) 45 | 46 | match = re.match('.*sending grant for id ([0-9]+)', 47 | line) 48 | if match: 49 | id = match.group(1) 50 | if id in rpcs and not 'grant' in rpcs[id]: 51 | rpcs[id]['grant'] = time 52 | rpcs[id]['grant_offset'] = rpcs[id]['offset'] 53 | 54 | match = re.match('.*homa_gro_receive got packet .* id ([0-9]+), ' 55 | 'offset ([0-9]+)', line) 56 | if match: 57 | id = match.group(1) 58 | offset = int(match.group(2)) 59 | if (not id in rpcs) and (offset == 0): 60 | rpcs[id] = {'offset': 0, 'start': time} 61 | if id in rpcs: 62 | rpcs[id]['end'] = time 63 | if offset > rpcs[id]['offset']: 64 | rpcs[id]['offset'] = offset 65 | 66 | match = re.match('.*incoming data packet, id ([0-9]+), .* offset ' 67 | '([0-9]+)/([0-9]+)', line) 68 | if match: 69 | id = match.group(1) 70 | length = int(match.group(3)) 71 | if id in rpcs: 72 | rpcs[id]['length'] = length 73 | 74 | total_bytes = 0 75 | total_bytes2 = 0 76 | total_time = 0 77 | total_time2 = 0 78 | tputs = [] 79 | tputs2 = [] 80 | for id in sorted(rpcs.keys()): 81 | rpc = rpcs[id] 82 | if (not 'start' in rpc) or (not 'end' in rpc): 83 | continue 84 | if rpc['offset'] < 300000: 85 | continue 86 | bytes = rpc['offset'] 87 | time = rpc['end'] - rpc['start'] 88 | tput = bytes*8.0/time/1000 89 | tputs.append(tput) 90 | total_bytes += bytes 91 | total_time += time 92 | 93 | # Compute separate statistics for throughput after sending the first 94 | # grant (this eliminates time waiting for the message to become highest 95 | # priority) 96 | if 'grant' in rpc: 97 | bytes2 = rpc['offset'] - rpc['grant_offset'] 98 | time2 = rpc['end'] - rpc['grant'] 99 | tput2 = bytes2*8.0/time2/1000 100 | tputs2.append(tput2) 101 | total_bytes2 += bytes2 102 | total_time2 += time2 103 | 104 | if verbose: 105 | print("%9.3f: id %s, grant at %9.3f, offset grant_offset %d, " 106 | "last_offset %d at %9.3f, tput %.1f, tput2 %.1f" % ( 107 | rpc['start'], id, rpc['grant'], rpc['grant_offset'], rpc['offset'], 108 | rpc['end'], tput, tput2)) 109 | 110 | tputs.sort() 111 | if verbose: 112 | print("") 113 | print("Messages >= 300KB: %d" % (len(tputs))) 114 | print("Entire messages:") 115 | print("Minimum tput: %4.1f Gbps" % (tputs[0])) 116 | print("Median tput: %4.1f Gbps" % (tputs[len(tputs)//2])) 117 | print("P90 tput: %4.1f Gbps" % (tputs[len(tputs)*9//10])) 118 | print("P99 tput: %4.1f Gbps" % (tputs[len(tputs)*99//100])) 119 | print("Maximum tput: %4.1f Gbps" % (tputs[-1])) 120 | print("Average tput: %4.1f Gbps" % (total_bytes*8.0/total_time/1000)) 121 | 122 | tputs2.sort() 123 | print("\nMessage data after first grant:") 124 | print("Minimum tput: %4.1f Gbps" % (tputs2[0])) 125 | print("Median tput: %4.1f Gbps" % (tputs2[len(tputs2)//2])) 126 | print("P90 tput: %4.1f Gbps" % (tputs2[len(tputs2)*9//10])) 127 | print("P99 tput: %4.1f Gbps" % (tputs2[len(tputs2)*99//100])) 128 | print("Maximum tput: %4.1f Gbps" % (tputs2[-1])) 129 | print("Average tput: %4.1f Gbps" % (total_bytes2*8.0/total_time2/1000)) 130 | -------------------------------------------------------------------------------- /util/ttgrep.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright (c) 2019-2022 Homa Developers 4 | # SPDX-License-Identifier: BSD-1-Clause 5 | 6 | """ 7 | Scan the time trace data in a log file; find all records containing 8 | a given string, and output only those records. If the --rebase argument 9 | is present, times are offset so the first event is at time 0. If the file 10 | is omitted, standard input is used. 11 | Usage: ttgrep.py [--rebase] string [file] 12 | """ 13 | 14 | from __future__ import division, print_function 15 | from glob import glob 16 | from optparse import OptionParser 17 | import math 18 | import os 19 | import re 20 | import string 21 | import sys 22 | 23 | rebase = False 24 | 25 | def scan(f, string): 26 | """ 27 | Scan the log file given by 'f' (handle for an open file) and output 28 | all-time trace records containing string. 29 | """ 30 | global rebase 31 | startTime = 0.0 32 | prevTime = 0.0 33 | writes = 0 34 | for line in f: 35 | match = re.match(' *([-0-9.]+) us \(\+ *([0-9.]+) us\) (.*)', 36 | line) 37 | if not match: 38 | continue 39 | time = float(match.group(1)) 40 | interval = float(match.group(2)) 41 | event = match.group(3) 42 | if (string not in event) and ("Freez" not in event): 43 | continue 44 | if startTime == 0.0: 45 | startTime = time 46 | prevTime = time 47 | if rebase: 48 | printTime = time - startTime 49 | else: 50 | printTime = time 51 | print("%9.3f us (+%8.3f us) %s" % (printTime, 52 | time - prevTime, event)) 53 | prevTime = time 54 | 55 | if (len(sys.argv) > 1) and (sys.argv[1] == "--rebase"): 56 | rebase = True 57 | del sys.argv[1] 58 | 59 | f = sys.stdin 60 | if len(sys.argv) == 3: 61 | f = open(sys.argv[2]) 62 | elif len(sys.argv) != 2: 63 | print("Usage: %s [--rebase] string [logFile]" % (sys.argv[0])) 64 | sys.exit(1) 65 | 66 | scan(f, sys.argv[1]) -------------------------------------------------------------------------------- /util/ttmerge.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright (c) 2019-2022 Homa Developers 4 | # SPDX-License-Identifier: BSD-1-Clause 5 | 6 | """ 7 | Merge two or more timetraces into a single trace. All of the traces 8 | must use the same time source. 9 | Usage: ttmerge.py file file file ... 10 | """ 11 | 12 | from __future__ import division, print_function 13 | from glob import glob 14 | import math 15 | from optparse import OptionParser 16 | import os 17 | import re 18 | import string 19 | import sys 20 | 21 | # Each entry in the following list describes one file; it is a dictionary 22 | # with the following fields: 23 | # name: Name of the file 24 | # f: Open file for reading 25 | # ghz: Clock rate assumed for this file 26 | # first: Timestamp of first entry 27 | # offset: How much to add to times in this file so they align 28 | # with times in the other files 29 | # time: Time of the current line, adjusted by offset 30 | # suffix: Everything on the current line after the times 31 | files = [] 32 | 33 | # Earliest first timestamp from all the files. 34 | first = 0 35 | 36 | # Reference ghz (taken from input file with the earliest start time; 37 | # used for output). Used to compensate for the fact that different 38 | # traces may have assumed slightly different conversion rates from 39 | # ticks to microseconds. 40 | ghz = 0.0 41 | 42 | def next_line(info): 43 | """ 44 | Read information from a file. The info argument is one of the 45 | entries in files. 46 | """ 47 | while True: 48 | line = info["f"].readline() 49 | if not line: 50 | info["f"].close() 51 | info["f"] = None 52 | return 53 | match = re.match(' *([0-9.]+) us \(\+ *([0-9.]+) us\) (.*)', line) 54 | if not match: 55 | continue 56 | info["time"] = (float(match.group(1)) * ghz / info["ghz"]) + info["offset"] 57 | info["suffix"] = match.group(3).rstrip() 58 | return 59 | 60 | # Open each of the files and initialize information for them. 61 | for file in sys.argv[1:]: 62 | f = open(file, newline='\n') 63 | line = f.readline() 64 | if not line: 65 | continue 66 | info = {"f": f} 67 | match = re.match(' *([0-9.]+) us \(\+ *([0-9.]+) us\) .* ' 68 | 'First event has timestamp ([0-9]+) ' 69 | '\(cpu_ghz ([0-9.]+)\)', line) 70 | if not match: 71 | continue 72 | info = {"name": file, 73 | "f": f, 74 | "ghz": float(match.group(4)), 75 | "first": int(match.group(3)), 76 | "offset": 0.0} 77 | files.append(info) 78 | 79 | # Find the earliest timestamp and set offsets. 80 | for info in files: 81 | if (first == 0) or info["first"] < first: 82 | first = info["first"] 83 | ghz = info["ghz"] 84 | for info in files: 85 | info["offset"] = ((info["first"] - first)/ghz)/1000.0 86 | # print("file %s has offset %.2f us (difference: %d)" % (info["name"], 87 | # info["offset"], info["first"] - first)) 88 | 89 | # Prime the info with the first real trace entry. 90 | next_line(info) 91 | 92 | # Repeatedly output the earliest line until there are no lines left to output. 93 | prevTime = 0.0 94 | while True: 95 | best = None 96 | best_time = 0.0 97 | for info in files: 98 | if info["f"] and ((best_time == 0.0) or (info["time"] < best_time)): 99 | best_time = info["time"] 100 | best = info 101 | if not best: 102 | break 103 | time = best["time"] 104 | print("%9.3f us (+%8.3f us) %s" % (time, time - prevTime, best["suffix"])) 105 | prev_time = time 106 | next_line(best) -------------------------------------------------------------------------------- /util/ttoffset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright (c) 2019-2022 Homa Developers 4 | # SPDX-License-Identifier: BSD-1-Clause 5 | 6 | """ 7 | Rewrite a time trace with all of the times offset by a fixed amount 8 | (typically used to align the times in two timetraces) 9 | Usage: ttoffset.py old_time new_time [tt_file] 10 | 11 | The existing timetrace is in tt_file (or stdin in tt_file is omitted); a new 12 | timetrace will be written to standard output, with (new_time - old_time) 13 | added to each timestamp. 14 | """ 15 | 16 | from __future__ import division, print_function 17 | from glob import glob 18 | from optparse import OptionParser 19 | import math 20 | import os 21 | import re 22 | import string 23 | import sys 24 | 25 | if len(sys.argv) == 4: 26 | f = open(sys.argv[3]) 27 | elif len(sys.argv) == 3: 28 | f = sys.stdin 29 | else: 30 | print("Usage: %s old_time new_time [tt_file]" % (sys.argv[0])) 31 | sys.exit(1) 32 | 33 | delta = float(sys.argv[2]) - float(sys.argv[1]) 34 | 35 | for line in f: 36 | match = re.match(' *([0-9.]+) us (.*)', line) 37 | if not match: 38 | print(line) 39 | continue 40 | time = float(match.group(1)) 41 | print("%9.3f us %s" % (time + delta, match.group(2))) -------------------------------------------------------------------------------- /util/ttprint.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright (c) 2019-2022 Homa Developers 4 | # SPDX-License-Identifier: BSD-1-Clause 5 | 6 | """ 7 | This program reads timetrace information from /proc/timetrace (or from 8 | the first argument, if given) and prints it out in a different form, 9 | with times in microseconds instead of clock cycles. 10 | """ 11 | 12 | from __future__ import division, print_function 13 | from glob import glob 14 | from optparse import OptionParser 15 | import math 16 | import os 17 | import re 18 | import string 19 | import sys 20 | 21 | # Clock cycles per nanosecond. 22 | cpu_ghz = 0.0 23 | 24 | # Time in cycles of first event. 25 | first_time = 0 26 | 27 | # Time in cycles of previous event. 28 | prev_time = 0 29 | 30 | file_name = "/proc/timetrace" 31 | if len(sys.argv) > 1: 32 | file_name = sys.argv[1] 33 | f = open(file_name) 34 | 35 | # Read initial line containing clock rate. 36 | line = f.readline() 37 | if not line: 38 | print('File empty!') 39 | exit(0) 40 | match = re.match('cpu_khz: ([0-9.]+)', line) 41 | if not match: 42 | print('Initial line doesn\'t contain clock rate:\n%s' % (line)) 43 | exit(1) 44 | cpu_ghz = float(match.group(1))*1e-06 45 | 46 | for line in f: 47 | match = re.match('([0-9.]+) (.+)', line) 48 | if not match: 49 | continue 50 | this_time = float(match.group(1)) 51 | this_event = match.group(2) 52 | if first_time == 0.0: 53 | first_time = this_time 54 | prev_time = this_time 55 | print('%9.3f us (+%8.3f us) [C00] First event has timestamp %s ' 56 | '(cpu_ghz %.15f)' % (0, 0, match.group(1), cpu_ghz)) 57 | print('%9.3f us (+%8.3f us) %s' % ( 58 | (this_time - first_time)/(1000.0 *cpu_ghz), 59 | (this_time - prev_time)/(1000.0 * cpu_ghz), this_event)) 60 | prev_time = this_time -------------------------------------------------------------------------------- /util/ttrange.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright (c) 2019-2022 Homa Developers 4 | # SPDX-License-Identifier: BSD-1-Clause 5 | 6 | """ 7 | Extract entries from a timetrace that For any particular time range. 8 | Usage: ttrange.py start_time end_time [tt_file] 9 | 10 | The existing timetrace is in tt_file (or stdin in tt_file is omitted); a new 11 | timetrace will be written to standard output containing all entries whose 12 | timestamps fall between start_time and end_time, inclusive. 13 | """ 14 | 15 | from __future__ import division, print_function 16 | from glob import glob 17 | from optparse import OptionParser 18 | import math 19 | import os 20 | import re 21 | import string 22 | import sys 23 | 24 | if len(sys.argv) == 4: 25 | f = open(sys.argv[3]) 26 | elif len(sys.argv) == 3: 27 | f = sys.stdin 28 | else: 29 | print("Usage: %s start_time end_time [tt_file]" % (sys.argv[0])) 30 | sys.exit(1) 31 | 32 | start_time = float(sys.argv[1]) 33 | end_time = float(sys.argv[2]) 34 | 35 | for line in f: 36 | match = re.match(' *([0-9.]+) us (.*)', line) 37 | if not match: 38 | continue 39 | time = float(match.group(1)) 40 | if (time >= start_time) and (time <= end_time): 41 | print(line.rstrip('\n')) -------------------------------------------------------------------------------- /util/ttsyslog.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright (c) 2019-2022 Homa Developers 4 | # SPDX-License-Identifier: BSD-1-Clause 5 | 6 | """ 7 | This program reads timetrace information that was printk-ed to the 8 | system log, removing extraneous syslog information and printing it 9 | out with times in microseconds instead of clock cycles. 10 | 11 | Usage: 12 | ttsyslog.py [file] 13 | 14 | If no file is given, the information is read from standard input. 15 | """ 16 | 17 | from __future__ import division, print_function 18 | from glob import glob 19 | from optparse import OptionParser 20 | import math 21 | import os 22 | import re 23 | import string 24 | import sys 25 | 26 | # Clock cycles per nanosecond. 27 | cpu_ghz = None 28 | 29 | # Time in cycles of first event. 30 | first_time = 0 31 | 32 | # Time in cycles of previous event. 33 | prev_time = 0 34 | 35 | f = sys.stdin 36 | if len(sys.argv) > 1: 37 | f = open(sys.argv[1]) 38 | 39 | for line in f: 40 | # Ignore everything up until the initial line containing the clock speed. 41 | if cpu_ghz == None: 42 | match = re.match('.*cpu_khz: ([0-9.]+)', line) 43 | if match: 44 | cpu_ghz = float(match.group(1))*1e-06 45 | continue 46 | 47 | match = re.match('.* ([0-9.]+) (\[C..\] .+)', line) 48 | if not match: 49 | continue 50 | this_time = float(match.group(1)) 51 | this_event = match.group(2) 52 | if first_time == 0.0: 53 | first_time = this_time 54 | prev_time = this_time 55 | print('%9.3f us (+%8.3f us) [C00] First event has timestamp %s ' 56 | '(cpu_ghz %.15f)' % (0, 0, match.group(1), cpu_ghz)) 57 | print('%9.3f us (+%8.3f us) %s' % ( 58 | (this_time - first_time)/(1000.0 *cpu_ghz), 59 | (this_time - prev_time)/(1000.0 * cpu_ghz), this_event)) 60 | prev_time = this_time -------------------------------------------------------------------------------- /util/use_memory.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2019-2022 Homa Developers 2 | * SPDX-License-Identifier: BSD-1-Clause 3 | */ 4 | 5 | /* This program allocates a given amount of memory and then sleeps 6 | * forever. It is intended to create memory pressure in order to see 7 | * how other parts of the system react when memory runs low. 8 | * 9 | * Usage: 10 | * use_memory gbytes 11 | */ 12 | 13 | #include 14 | #include 15 | #include 16 | 17 | int main(int argc, char** argv) { 18 | int gbytes, i, j; 19 | 20 | if (argc != 2) { 21 | printf("Usage: %s gbytes\n", argv[0]); 22 | exit(1); 23 | } 24 | gbytes = strtol(argv[1], NULL, 10); 25 | if (gbytes == 0) { 26 | printf("Bad value %s; must be integer # of gbytes to allocate\n", 27 | argv[1]); 28 | exit(1); 29 | } 30 | 31 | // Each iteration through the following loop allocates 10^9 bytes 32 | // of memory and fills it with random values. 33 | for (i = 0; i < gbytes; i++) { 34 | #define INTS_PER_GIG 256000000 35 | int *block; 36 | block = (int *) malloc(INTS_PER_GIG*sizeof(int)); 37 | if (block == NULL) { 38 | printf("Malloc returned NULL.\n"); 39 | exit(1); 40 | } 41 | for (j = 0; j < INTS_PER_GIG; j++) { 42 | block[j] = random(); 43 | } 44 | printf("Memory allocated: %d gbytes\n", i+1); 45 | } 46 | while (1) { 47 | sleep(1000); 48 | } 49 | return 0; 50 | } 51 | 52 | --------------------------------------------------------------------------------