├── micro-bench ├── .gitignore └── gbench │ ├── Makefile │ └── gbench.c ├── Makefile ├── .gitignore ├── bin ├── sched-config ├── schedmon ├── energyprof ├── procmon ├── scmon ├── scinsight ├── mbench ├── energyinsight ├── ginsight ├── procinsight └── report ├── config └── MangoHud.conf ├── README.md └── LICENSE /micro-bench/.gitignore: -------------------------------------------------------------------------------- 1 | /schbench 2 | /gbench/gbench 3 | 4 | -------------------------------------------------------------------------------- /micro-bench/gbench/Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | CFLAGS = -Wall -O1 -g -W -Wno-unused-parameter 3 | ALL_CFLAGS = $(CFLAGS) -D_GNU_SOURCE -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 4 | 5 | PROGS = gbench 6 | ALL = $(PROGS) 7 | 8 | $(PROGS): | depend 9 | 10 | all: $(ALL) 11 | 12 | %.o: %.c 13 | $(CC) -o $*.o -c $(ALL_CFLAGS) $< 14 | 15 | gbench: gbench.o 16 | $(CC) $(ALL_CFLAGS) -o $@ $(filter %.o,$^) -lpthread -lm 17 | 18 | depend: 19 | @$(CC) -MM $(ALL_CFLAGS) *.c 1> .depend 20 | 21 | clean: 22 | -rm -f *.o $(PROGS) .depend 23 | 24 | ifneq ($(wildcard .depend),) 25 | include .depend 26 | endif 27 | 28 | 29 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | MK_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) 2 | INSTALL_DIR := $(MK_DIR)/bin 3 | 4 | all: schbench gbench 5 | 6 | micro-bench/schbench/schbench.c: 7 | (cd $(MK_DIR)/micro-bench/; git clone --depth 1 https://kernel.googlesource.com/pub/scm/linux/kernel/git/mason/schbench) 8 | 9 | schbench: micro-bench/schbench/schbench.c 10 | (cd $(MK_DIR)/micro-bench/schbench && make) 11 | cp $$(find $(MK_DIR)/micro-bench/schbench -type f -executable -print | grep -v '\.git/') $(INSTALL_DIR) 12 | 13 | gbench: micro-bench/gbench/gbench.c 14 | (cd $(MK_DIR)/micro-bench/gbench && make) 15 | cp $$(find $(MK_DIR)/micro-bench/gbench -type f -executable -print) $(INSTALL_DIR) 16 | 17 | clean: 18 | (cd $(MK_DIR)/micro-bench/gbench && make clean) 19 | (cd $(MK_DIR)/micro-bench/schbench && make clean && rm -rf $(MK_DIR)/micro-bench/schbench) 20 | 21 | .PHONY: all schbench gbench clean 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /bin/schbench 2 | /bin/gbench 3 | /bin/schedstat.py 4 | /logs* 5 | .depend 6 | *.o 7 | 8 | # http://www.gnu.org/software/automake 9 | 10 | Makefile.in 11 | /ar-lib 12 | /mdate-sh 13 | /py-compile 14 | /test-driver 15 | /ylwrap 16 | .deps/ 17 | .dirstamp 18 | 19 | # http://www.gnu.org/software/autoconf 20 | 21 | autom4te.cache 22 | /autoscan.log 23 | /autoscan-*.log 24 | /aclocal.m4 25 | /compile 26 | /config.cache 27 | /config.guess 28 | /config.h.in 29 | /config.log 30 | /config.status 31 | /config.sub 32 | /configure 33 | /configure.scan 34 | /depcomp 35 | /install-sh 36 | /missing 37 | /stamp-h1 38 | 39 | # https://www.gnu.org/software/libtool/ 40 | 41 | /ltmain.sh 42 | 43 | # http://www.gnu.org/software/texinfo 44 | 45 | /texinfo.tex 46 | 47 | # http://www.gnu.org/software/m4/ 48 | 49 | m4/libtool.m4 50 | m4/ltoptions.m4 51 | m4/ltsugar.m4 52 | m4/ltversion.m4 53 | m4/lt~obsolete.m4 54 | 55 | # Generated Makefile 56 | # (meta build system like autotools, 57 | # can automatically generate from config.status script 58 | # (which is called by configure script)) 59 | Makefile 60 | -------------------------------------------------------------------------------- /bin/sched-config: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sys 3 | import csv 4 | import argparse 5 | 6 | def sched_config_set(config_csv): 7 | with open(config_csv, "r") as cf: 8 | rd = csv.reader(cf) 9 | rd.__next__() 10 | for (k, v) in rd: 11 | k = k.strip() 12 | v = v.strip() 13 | with open(k, "w") as kf: 14 | print(v, file = kf) 15 | 16 | def sched_config_get(config_csv): 17 | params = ["/proc/sys/kernel/sched_cfs_bandwidth_slice_us", 18 | "/sys/kernel/debug/sched/latency_ns", 19 | "/sys/kernel/debug/sched/min_granularity_ns", 20 | "/sys/kernel/debug/sched/wakeup_granularity_ns", 21 | "/sys/kernel/debug/sched/migration_cost_ns", 22 | "/sys/kernel/debug/sched/nr_migrate"] 23 | 24 | with open(config_csv, "w") as cf: 25 | print("%s, %s" % ("{0:^50}".format("attribute"), 26 | "{0:^20}".format("value")), file = cf) 27 | for p in params: 28 | with open(p, "r") as f: 29 | l = f.readline() 30 | v = l.strip() 31 | print("%s, %s" % ("{0:<50}".format(p), 32 | "{0:>20}".format(v)), file = cf) 33 | 34 | def sched_config(args): 35 | if args.get != None: 36 | sched_config_get(args.get) 37 | elif args.set != None: 38 | sched_config_set(args.set) 39 | 40 | def get_cmd_options(argv): 41 | parser = argparse.ArgumentParser( 42 | prog = "sched-config", 43 | description = "Set or get the scheduler config parameters") 44 | parser.add_argument('-g', '--get', action='store', 45 | help='Get the scheculer parameters') 46 | parser.add_argument('-s', '--set', action='store', 47 | help='Set the scheculer parameters') 48 | args = parser.parse_args(argv) 49 | 50 | # sanity check of arguments 51 | nopts = (0 if args.get == None else 1) + \ 52 | (0 if args.set == None else 1) 53 | if nopts != 1: 54 | parser.print_help() 55 | print("sched-config: error: either '-g' or '-s' should be specified", 56 | file = sys.stderr) 57 | exit(1) 58 | return args 59 | 60 | 61 | if __name__ == "__main__": 62 | args = get_cmd_options(sys.argv[1:]) 63 | sched_config(args) 64 | 65 | """ 66 | attribute , value 67 | /proc/sys/kernel/sched_cfs_bandwidth_slice_us , 3000 68 | /sys/kernel/debug/sched/latency_ns , 3000000 69 | /sys/kernel/debug/sched/min_granularity_ns , 300000 70 | /sys/kernel/debug/sched/wakeup_granularity_ns , 500000 71 | /sys/kernel/debug/sched/migration_cost_ns , 50000 72 | /sys/kernel/debug/sched/nr_migrate , 128 73 | """ 74 | -------------------------------------------------------------------------------- /bin/schedmon: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import sys 4 | import subprocess 5 | import signal 6 | import glob 7 | import argparse 8 | import psutil 9 | 10 | mon_procs = [] 11 | 12 | def ignore_term_signals(): 13 | term_signals = (signal.SIGTERM, signal.SIGINT, signal.SIGABRT, 14 | signal.SIGBUS, signal.SIGILL, signal.SIGSEGV, 15 | signal.SIGHUP) 16 | for s in term_signals: 17 | # do nothing upon kill signals for graceful exit 18 | signal.signal(s, lambda signum, frame: None) 19 | 20 | def get_log_name(args, kind): 21 | log = os.path.join(args.outdir, 22 | args.log + "-schedmon-" + kind+ "__.log") 23 | return log 24 | 25 | def run_schedmon(args): 26 | # prep for gracefil termination 27 | ignore_term_signals() 28 | 29 | # prep for logging 30 | subprocess.Popen("mkdir -p " + args.outdir, 31 | shell=True, stdout=None, stderr=None).wait() 32 | outdir = args.outdir 33 | 34 | # launch a background monitor 35 | log = get_log_name(args, "raw") 36 | sh_cmd = "perf sched record -ag -o %s sleep 36500d" % log 37 | p = subprocess.Popen(sh_cmd, shell=True) 38 | mon_procs.append(p) 39 | 40 | def wait_for_schedmon(args): 41 | # wait for the background processes 42 | for p in mon_procs: 43 | p.wait() 44 | 45 | # prep comand line for report generation 46 | sh_cmds = [] 47 | raw_log = get_log_name(args, "raw") 48 | # - latency 49 | log = get_log_name(args, "latency") 50 | sh_cmd = "perf sched latency -i %s > %s" % (raw_log, log) 51 | sh_cmds.append(sh_cmd) 52 | # - map 53 | log = get_log_name(args, "map") 54 | sh_cmd = "perf sched map -i %s > %s" % (raw_log, log) 55 | sh_cmds.append(sh_cmd) 56 | # - timehist (full) 57 | log = get_log_name(args, "timehist_full") 58 | sh_cmd = "perf sched timehist -SMVwng -i %s > %s" % (raw_log, log) 59 | sh_cmds.append(sh_cmd) 60 | # - timehist (short) 61 | # log = get_log_name(args, "timehist_short") 62 | # sh_cmd = "perf sched timehist -Sg -i %s > %s" % (raw_log, log) 63 | # sh_cmds.append(sh_cmd) 64 | 65 | # launch background monitors 66 | for sh_cmd in sh_cmds: 67 | p = subprocess.Popen(sh_cmd, shell=True) 68 | mon_procs.append(p) 69 | p.wait() 70 | 71 | 72 | def get_cmd_options(argv): 73 | parser = argparse.ArgumentParser( 74 | prog = "schedmon", 75 | description = "Collect the detailed scheduler activity internally using `perf sched`") 76 | parser.add_argument('-o', '--outdir', action='store', required=True, 77 | help='output directory') 78 | parser.add_argument('-l', '--log', action='store', required=True, 79 | help='log file prefix') 80 | args = parser.parse_args(argv) 81 | 82 | return args 83 | 84 | if __name__ == "__main__": 85 | args = get_cmd_options(sys.argv[1:]) 86 | run_schedmon(args) 87 | wait_for_schedmon(args) 88 | 89 | -------------------------------------------------------------------------------- /bin/energyprof: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import sys 4 | import subprocess 5 | import signal 6 | import glob 7 | import argparse 8 | import psutil 9 | 10 | bg_proc = None 11 | 12 | def ignore_term_signals(): 13 | term_signals = (signal.SIGTERM, signal.SIGINT, signal.SIGABRT, 14 | signal.SIGBUS, signal.SIGILL, signal.SIGSEGV, 15 | signal.SIGHUP) 16 | for s in term_signals: 17 | # do nothing upon kill signals for graceful exit 18 | signal.signal(s, lambda signum, frame: None) 19 | 20 | def get_log_name(args): 21 | log = os.path.join(args.outdir, args.log + "-energyprof__.log") 22 | return log 23 | 24 | def get_cpu_configs(args): 25 | configs = {8: ["0", "0-1", "0,4", "0-3", "0,1,4,5", "0-7"], 26 | 16: ["0", "0-1", "0,8", "0-3", "0,1,7,8", "0-7", "0-3,8-11", "0-15"], } 27 | return configs[args.num_cpus] 28 | 29 | def chcpu(args, config, f): 30 | subprocess.Popen("chcpu -d 1-" + str(args.num_cpus), shell=True, stdout=f, stderr=f).wait() 31 | subprocess.Popen("chcpu -e " + config, shell=True, stdout=f, stderr=f).wait() 32 | subprocess.Popen("sleep 1", shell=True).wait() 33 | 34 | def print_config(args, config, load, f): 35 | fds = [f, sys.__stdout__] 36 | for fd in fds: 37 | print("## cpu=" + config + ", load=" + str(load) + 38 | ",util=" + str(load * args.num_cpus), file=fd) 39 | 40 | def run_ubench(args, config, load, f): 41 | cmd = "perf stat -a --per-socket -e power/energy-pkg/ " + \ 42 | "stress-ng --change-cpu --no-rand-seed " + \ 43 | "--taskset " + config + " --all " + str(args.num_cpus) + \ 44 | " --cpu " + str(args.num_cpus) + " --cpu-method all " + \ 45 | " --cpu-load " + str(load) + " --cpu-load-slice 3 " + \ 46 | " --metrics -t " + str(args.time_sec) 47 | subprocess.Popen(cmd, shell=True, stdout=f, stderr=f).wait() 48 | 49 | def run_energyprof(args): 50 | # prep for gracefil termination 51 | ignore_term_signals() 52 | 53 | # prep for logging 54 | subprocess.Popen("mkdir -p " + args.outdir, 55 | shell=True, stdout=None, stderr=None).wait() 56 | outdir = args.outdir 57 | log = get_log_name(args) 58 | with open(log, 'w') as f: 59 | # turn on turbostat as a background process 60 | global bg_proc 61 | bg_proc = subprocess.Popen( 62 | "taskset -c 0 turbostat --header_iterations 1 -S", 63 | shell=True, stdout=f, stderr=f) 64 | subprocess.Popen("sleep 5", shell=True).wait() 65 | 66 | # for each CPU set 67 | for config in get_cpu_configs(args): 68 | # make only target CPUs online 69 | chcpu(args, config, f) 70 | 71 | # for low load settings 72 | for load in range(1, 7): 73 | print_config(args, config, load, f) 74 | run_ubench(args, config, load, f) 75 | 76 | # for high load settings 77 | for load in range(12, 101, 6): 78 | print_config(args, config, load, f) 79 | run_ubench(args, config, load, f) 80 | 81 | wait_for_energyprof(args, f) 82 | 83 | 84 | def wait_for_energyprof(args, f): 85 | # stop the turbostat 86 | subprocess.Popen("pkill turbostat", shell=True).wait() 87 | global bg_proc 88 | bg_proc.wait() 89 | 90 | # activate all CPUs 91 | chcpu(args, "0-" + str(args.num_cpus - 1), f) 92 | 93 | def get_cmd_options(argv): 94 | parser = argparse.ArgumentParser( 95 | prog = "energyprof", 96 | description = "Collect energy and performance statistics according to CPU load", 97 | epilog = "energyprof internally uses 'turbostat', 'stress-ng', 'chcpu', 'taskset', and 'perf'.") 98 | parser.add_argument('-c', '--num_cpus', action='store', type=int, 99 | required=True, help='number of CPUs of this machine') 100 | parser.add_argument('-t', '--time_sec', action='store', type=int, 101 | default=120, help='time in sec to run stress-_ng') 102 | parser.add_argument('-o', '--outdir', action='store', required=True, 103 | help='output directory') 104 | parser.add_argument('-l', '--log', action='store', required=True, 105 | help='log file prefix') 106 | 107 | args = parser.parse_args(argv) 108 | return args 109 | 110 | if __name__ == "__main__": 111 | args = get_cmd_options(sys.argv[1:]) 112 | run_energyprof(args) 113 | -------------------------------------------------------------------------------- /bin/procmon: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import sys 4 | import subprocess 5 | import signal 6 | import glob 7 | import argparse 8 | import psutil 9 | 10 | mon_procs = [] 11 | 12 | def ignore_term_signals(): 13 | term_signals = (signal.SIGTERM, signal.SIGINT, signal.SIGABRT, 14 | signal.SIGBUS, signal.SIGILL, signal.SIGSEGV, 15 | signal.SIGHUP) 16 | for s in term_signals: 17 | # do nothing upon kill signals for graceful exit 18 | signal.signal(s, lambda signum, frame: None) 19 | 20 | def get_log_name(args, mon): 21 | log = os.path.join(args.outdir, 22 | args.log + "-procmon-" + mon + "__.log") 23 | return log 24 | 25 | def run_procmons(args): 26 | # prep for gracefil termination 27 | ignore_term_signals() 28 | 29 | # prep for logging 30 | subprocess.Popen("mkdir -p " + args.outdir, 31 | shell=True, stdout=None, stderr=None).wait() 32 | outdir = args.outdir 33 | 34 | # prep comand line for each background monitor 35 | sh_cmds = [] 36 | # -s, --sched 37 | if args.sched: 38 | log = get_log_name(args, "sched") 39 | dat = log + ".dat" 40 | sh_cmd = "trace-cmd record -e sched_wakeup -o %s > /dev/null" % dat 41 | sh_cmds.append(sh_cmd) 42 | # -c, --cstate 43 | if args.cstate: 44 | log = get_log_name(args, "cstate") 45 | sh_cmd = "cpupower monitor sleep 36500d > " + log 46 | sh_cmds.append(sh_cmd) 47 | # -e, --energy 48 | if args.energy: 49 | log = get_log_name(args, "energy") 50 | sh_cmd = "perf stat -a --per-socket -e power/energy-pkg/ 2>" + log 51 | sh_cmds.append(sh_cmd) 52 | # -p, --perf 53 | if args.perf: 54 | log = get_log_name(args, "perf") 55 | sh_cmd = "perf stat -a 2>" + log 56 | sh_cmds.append(sh_cmd) 57 | 58 | # launch background monitors 59 | for sh_cmd in sh_cmds: 60 | p = subprocess.Popen(sh_cmd, shell=True) 61 | mon_procs.append(p) 62 | 63 | def wait_for_procmons(args): 64 | # wait for the background processes 65 | for p in mon_procs: 66 | p.wait() 67 | 68 | # if '--sched' is on, generates a report using 'trace-cmd' 69 | if args.sched: 70 | log = get_log_name(args, "sched") 71 | dat = log + ".dat" 72 | sh_cmd = "trace-cmd report -i %s > %s" % (dat, log) 73 | p = subprocess.Popen(sh_cmd, shell=True) 74 | p.wait() 75 | 76 | def get_cmd_options(argv): 77 | parser = argparse.ArgumentParser( 78 | prog = "procmon", 79 | description = "Collect CPU statistics and system-wide scheduling statistics", 80 | epilog = "procmon internally uses 'trace-cmd', 'cpupower', and 'perf'.") 81 | parser.add_argument('-o', '--outdir', action='store', required=True, 82 | help='output directory') 83 | parser.add_argument('-l', '--log', action='store', required=True, 84 | help='log file prefix') 85 | 86 | parser.add_argument('-s', '--sched', action='store_true', 87 | help='trace wake-up events of process scheduler') 88 | parser.add_argument('-c', '--cstate', action='store_true', 89 | help='trace c-state of all CPUs') 90 | parser.add_argument('-e', '--energy', action='store_true', 91 | help='trace energy consumption of all CPUs') 92 | parser.add_argument('-p', '--perf', action='store_true', 93 | help='trace performance statistics of all CPUs') 94 | 95 | parser.add_argument('-a', '--all', action='store_true', 96 | help='trace all statistics') 97 | args = parser.parse_args(argv) 98 | 99 | # sanity check of arguments 100 | if args.all: 101 | (args.sched, args.cstate, args.energy, args.perf) = \ 102 | (True, True, True, True) 103 | nopts = (0 if args.sched == None else 1) + \ 104 | (0 if args.cstate == None else 1) + \ 105 | (0 if args.energy == None else 1) + \ 106 | (0 if args.perf == None else 1) 107 | if nopts == 0: 108 | parser.print_help() 109 | print("procmon: error: at least one out of '-s', '-c', `-e`, or '-p'" \ 110 | "should be specified", file = sys.stderr) 111 | exit(1) 112 | return args 113 | 114 | if __name__ == "__main__": 115 | args = get_cmd_options(sys.argv[1:]) 116 | run_procmons(args) 117 | wait_for_procmons(args) 118 | 119 | 120 | 121 | -------------------------------------------------------------------------------- /bin/scmon: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import sys 4 | import subprocess 5 | import signal 6 | import glob 7 | import argparse 8 | import psutil 9 | 10 | def pidof(pname): 11 | pids = [] 12 | for p in psutil.process_iter(): 13 | try: 14 | pinfo = p.as_dict(attrs=['pid', 'name']) 15 | if (pinfo['name'] == pname): 16 | pids.append( pinfo['pid'] ) 17 | except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess): 18 | pass 19 | return pids 20 | 21 | def pstree(pid): 22 | try: 23 | children = psutil.Process(pid).children(True) 24 | pids = map(lambda c: c.as_dict()['pid'], children) 25 | return list(pids) 26 | except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess): 27 | return [] 28 | 29 | def strace_pids(pids, odir, log): 30 | sh_cmd = "strace -o {log_path} -ff -p {pids}".format( 31 | log_path=os.path.join(odir, log), 32 | pids=",".join(str(pid) for pid in pids)) 33 | p = subprocess.Popen(sh_cmd, shell=True, stdout=None, stderr=None) 34 | p.wait() 35 | return p 36 | 37 | def strace_cmd(cmd, odir, log): 38 | sh_cmd = ["strace", "-o", os.path.join(odir, log), "-ff"] + cmd 39 | p = subprocess.Popen(sh_cmd) 40 | p.wait() 41 | return p 42 | 43 | def ignore_term_signals(): 44 | term_signals = (signal.SIGTERM, signal.SIGINT, signal.SIGABRT, 45 | signal.SIGBUS, signal.SIGILL, signal.SIGSEGV, 46 | signal.SIGHUP) 47 | for s in term_signals: 48 | # do nothing upon kill signals for graceful exit 49 | signal.signal(s, lambda signum, frame: None) 50 | 51 | def run_syscall_trace(args): 52 | # prep for gracefil termination 53 | ignore_term_signals() 54 | 55 | # prep for logging 56 | log = args.log + "-scmon" 57 | subprocess.Popen("mkdir -p " + args.outdir, 58 | shell=True, stdout=None, stderr=None).wait() 59 | outdir = args.outdir 60 | 61 | # strace with a command 62 | if args.cmd: 63 | strace_cmd(args.cmd, outdir, log) 64 | return 65 | 66 | # strace with process id(s) 67 | # -p: strace of pid 68 | if args.pid: 69 | pids = [args.pid] 70 | # -r: strace of pid and all its decendents 71 | elif args.root: 72 | pids = pstree(args.root) 73 | # -n: strace of a process with 'name' and all its decendents 74 | elif args.name: 75 | ps = pidof(args.name) 76 | if ps == []: 77 | print("scmon: error: %s does not exists" % args.name, 78 | file = sys.stderr) 79 | exit(1) 80 | pids = [] 81 | for p in ps: 82 | pids = pids + pstree(p) 83 | strace_pids(pids, outdir, log) 84 | 85 | def get_cmd_options(argv): 86 | parser = argparse.ArgumentParser( 87 | prog = "scmon", 88 | description = "Collect system call usage statistics of a program", 89 | epilog = "For example, 'scmon -o log -l steam -n steam' to log " \ 90 | "the system call usage of 'steam' and all its decendents " \ 91 | "under log/steam*-scmon*.") 92 | parser.add_argument('-o', '--outdir', action='store', required=True, 93 | help='output directory') 94 | parser.add_argument('-l', '--log', action='store', required=True, 95 | help='log file prefix' ) 96 | parser.add_argument('-p', '--pid', action='store', type=int, 97 | help='process id to monitor') 98 | parser.add_argument('-r', '--root', action='store', type=int, 99 | help='root process id to monitor ' \ 100 | '(all decendents will be monitored)') 101 | parser.add_argument('-n', '--name', action='store', 102 | help='name of a process to monitor') 103 | parser.add_argument('-c', '--cmd', action='store', nargs='+', 104 | help='command to execute') 105 | 106 | args = parser.parse_args(argv) 107 | 108 | # check if only one of -p, -r, or -c is specified 109 | nprogs = (0 if args.pid == None else 1) + \ 110 | (0 if args.root == None else 1) + \ 111 | (0 if args.name== None else 1) + \ 112 | (0 if args.cmd == None else 1) 113 | if nprogs != 1: 114 | parser.print_help() 115 | print("scmon: error: only one out of '-p', '-r', `-n`, or '-c'" \ 116 | "should be specified", file = sys.stderr) 117 | exit(1) 118 | return args 119 | 120 | if __name__ == "__main__": 121 | args = get_cmd_options(sys.argv[1:]) 122 | 123 | run_syscall_trace(args) 124 | 125 | -------------------------------------------------------------------------------- /bin/scinsight: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import sys 4 | import subprocess 5 | import signal 6 | import glob 7 | import zlib 8 | import matplotlib.pyplot as plt 9 | import argparse 10 | 11 | def get_syscall_name(line): 12 | at = line.find('(') 13 | syscall = line[0:at] 14 | if at == -1 or syscall.isalnum() == False: 15 | return None 16 | # further classify futex into futex wait and wake. 17 | if syscall == "futex": 18 | if line.find("FUTEX_WAIT") > 0: 19 | syscall = "futex::wait" 20 | elif line.find("FUTEX_WAKE") > 0: 21 | if line.find(") = 0") > 0: 22 | syscall = "futex::wake:non-zero" 23 | else: 24 | syscall = "futex::wake:zero" 25 | else: 26 | syscall = "futex::other" 27 | return syscall 28 | 29 | def get_strace_names(odir, log): 30 | prefix = os.path.join(odir, log) 31 | # 32 | # Match only prefix-scmon.* files. 33 | # 1. /path/to/prefix-scmon* 34 | # 35 | # Treat /path/to/prefix as a directory, and match any *-scmon.* files 36 | # inside of it. 37 | # 2. /path/to/prefix/*-scmon.* 38 | # 39 | logs = prefix + '-scmon.*' 40 | if not glob.glob(logs): 41 | logs = os.path.join(prefix, '*-scmon.*') 42 | return logs 43 | 44 | def syscall_stat(odir, log): 45 | stat = {} 46 | logs = get_strace_names(odir, log) 47 | for log in glob.glob(logs): 48 | with open(log, 'r') as f: 49 | for line in f: 50 | syscall = get_syscall_name(line) 51 | if syscall != None: 52 | stat[syscall] = stat.get(syscall, 0) + 1 53 | 54 | total = float(sum(stat.values())) 55 | scr_list = [(s, c, float(c)/total * 100.0) for s, c in stat.items()] 56 | scr_list.sort(key=lambda x: x[1], reverse=True) 57 | return scr_list 58 | 59 | def get_log_name(args, out): 60 | log = os.path.join(args.outdir, args.log) + "-scinsight-stat." + out 61 | return log 62 | 63 | def hash_rgb_from_str(s): 64 | h = zlib.crc32( bytes(s, 'utf-8') ) 65 | r = (h & 0x00FF00) >> 8 66 | g = (h & 0xFF0000) >> 16 67 | b = (h & 0x0000FF) >> 0 68 | return (float(r)/0xFF, float(g)/0xFF, float(b)/0xFF) 69 | 70 | def reset_plot(): 71 | plt.clf() 72 | plt.style.use('default') 73 | plt.rcParams['font.size'] = 7 74 | 75 | def gen_pie_chart(args, scr_list): 76 | # unzip stat 77 | sys_list, cnt_list, ratio_list = list(zip(*scr_list)) 78 | # label for each syscall 79 | label_list = list( map(lambda t: "%s (%.2f%s)" % (t[0], t[1], "%"), 80 | zip(sys_list, ratio_list)) ) 81 | # assign a color per syscall 82 | rgb_list = list( map(lambda s: hash_rgb_from_str(s), sys_list) ) 83 | 84 | # clear canvas 85 | reset_plot() 86 | fig, ax = plt.subplots(figsize=(3.5,3)) 87 | ax.pie(cnt_list, labels=label_list, colors=rgb_list) 88 | fig_name = get_log_name(args, "svg") 89 | plt.savefig(fig_name) 90 | plt.close() 91 | 92 | def report_syscall_stat_in_csv(scr_list, f): 93 | print("%s, %s, %s" % ("{0:^20}".format("syscall"), 94 | "{0:^20}".format("count"), 95 | "{0:^20}".format("ratio (%)")), file = f) 96 | 97 | for s, c, r in scr_list: 98 | print("%s, %s, %s" % ("{0:<20}".format(s), 99 | "{0:>20}".format(c), 100 | "{0:>20}".format("%.4f" % r)), file = f) 101 | 102 | def report_syscall_stat(args): 103 | # collect stat 104 | scr_list = syscall_stat(args.outdir, args.log) 105 | # generate a pie chart in svg 106 | gen_pie_chart(args, scr_list) 107 | # report in csv 108 | with open(get_log_name(args, "csv"), "w") as f: 109 | report_syscall_stat_in_csv(scr_list, f) 110 | if args.quiet == False: 111 | report_syscall_stat_in_csv(scr_list, sys.stdout) 112 | 113 | def get_cmd_options(argv): 114 | parser = argparse.ArgumentParser( 115 | prog = "scinsight", 116 | description = "Report system call usage statistics of a program",) 117 | parser.add_argument('-o', '--outdir', action='store', required=True, 118 | help='output directory') 119 | parser.add_argument('-l', '--log', action='store', required=True, 120 | help='log file prefix, or path to directory containing log files') 121 | parser.add_argument('-q', '--quiet', action='store_true', 122 | help='do not print result to stdout' ) 123 | 124 | args = parser.parse_args(argv) 125 | return args 126 | 127 | if __name__ == "__main__": 128 | args = get_cmd_options(sys.argv[1:]) 129 | report_syscall_stat(args) 130 | 131 | 132 | -------------------------------------------------------------------------------- /config/MangoHud.conf: -------------------------------------------------------------------------------- 1 | ### MangoHud configuration file 2 | ### Uncomment any options you wish to enable. Default options are left uncommented 3 | ### Use some_parameter=0 to disable a parameter (only works with on/off parameters) 4 | ### Everything below can be used / overridden with the environment variable MANGOHUD_CONFIG instead 5 | 6 | ################ PERFORMANCE ################# 7 | 8 | ### Limit the application FPS. Comma-separated list of one or more FPS values (e.g. 0,30,60). 0 means unlimited (unless VSynced) 9 | # fps_limit= 10 | 11 | ### VSync [0-3] 0 = adaptive; 1 = off; 2 = mailbox; 3 = on 12 | # vsync= 13 | 14 | ### OpenGL VSync [0-N] 0 = off; >=1 = wait for N v-blanks, N > 1 acts as a FPS limiter (FPS = display refresh rate / N) 15 | # gl_vsync= 16 | 17 | ################### VISUAL ################### 18 | 19 | ### Legacy layout 20 | # legacy_layout=false 21 | 22 | ### Display custom centered text, useful for a header 23 | # custom_text_center= 24 | 25 | ### Display the current system time 26 | # time 27 | 28 | ### Time formatting examples 29 | # time_format=%H:%M 30 | # time_format=[ %T %F ] 31 | # time_format=%X # locally formatted time, because of limited glyph range, missing characters may show as '?' (e.g. Japanese) 32 | 33 | ### Display MangoHud version 34 | # version 35 | 36 | ### Display the current GPU information 37 | ## Note: gpu_mem_clock also needs "vram" to be enabled 38 | gpu_stats 39 | # gpu_temp 40 | # gpu_core_clock 41 | # gpu_mem_clock 42 | # gpu_power 43 | # gpu_text=GPU 44 | # gpu_load_change 45 | # gpu_load_value=60,90 46 | # gpu_load_color=39F900,FDFD09,B22222 47 | 48 | ### Display the current CPU information 49 | cpu_stats 50 | # cpu_temp 51 | # cpu_power 52 | # cpu_text=CPU 53 | # cpu_mhz 54 | # cpu_load_change 55 | # cpu_load_value=60,90 56 | # cpu_load_color=39F900,FDFD09,B22222 57 | 58 | ### Display the current CPU load & frequency for each core 59 | # core_load 60 | # core_load_change 61 | 62 | ### Display IO read and write for the app (not system) 63 | # io_stats 64 | # io_read 65 | # io_write 66 | 67 | ### Display system vram / ram / swap space usage 68 | # vram 69 | ram 70 | swap 71 | 72 | ### Display per process memory usage 73 | ## Show resident memory and other types, if enabled 74 | # procmem 75 | # procmem_shared 76 | # procmem_virt 77 | 78 | ### Display battery information 79 | # battery 80 | # battery_icon 81 | # gamepad_battery 82 | # gamepad_battery_icon 83 | 84 | ### Display FPS and frametime 85 | fps 86 | # fps_sampling_period=500 87 | # fps_color_change 88 | # fps_value=30,60 89 | # fps_color=B22222,FDFD09,39F900 90 | frametime 91 | # frame_count 92 | 93 | ### Display miscellaneous information 94 | # engine_version 95 | # gpu_name 96 | # vulkan_driver 97 | # wine 98 | 99 | ### Display loaded MangoHud architecture 100 | # arch 101 | 102 | ### Display the frametime line graph 103 | frame_timing 104 | # histogram 105 | 106 | ### Display GameMode / vkBasalt running status 107 | # gamemode 108 | # vkbasalt 109 | 110 | ### Display current FPS limit 111 | # show_fps_limit 112 | 113 | ### Display the current resolution 114 | # resolution 115 | 116 | ### Display custom text 117 | # custom_text= 118 | ### Display output of Bash command in next column 119 | # exec= 120 | 121 | ### Display media player metadata 122 | # media_player 123 | # media_player_name=spotify 124 | ## Format metadata, lines are delimited by ; (wip) 125 | # media_player_format={title};{artist};{album} 126 | # media_player_format=Track:;{title};By:;{artist};From:;{album} 127 | 128 | ### Change the hud font size 129 | # font_size=24 130 | # font_scale=1.0 131 | # font_size_text=24 132 | # font_scale_media_player=0.55 133 | # no_small_font 134 | 135 | ### Change default font (set location to TTF/OTF file) 136 | ## Set font for the whole hud 137 | # font_file= 138 | 139 | ## Set font only for text like media player metadata 140 | # font_file_text= 141 | 142 | ## Set font glyph ranges. Defaults to Latin-only. Don't forget to set font_file/font_file_text to font that supports these 143 | ## Probably don't enable all at once because of memory usage and hardware limits concerns 144 | ## If you experience crashes or text is just squares, reduce glyph range or reduce font size 145 | # font_glyph_ranges=korean,chinese,chinese_simplified,japanese,cyrillic,thai,vietnamese,latin_ext_a,latin_ext_b 146 | 147 | ### Change the hud position 148 | # position=top-left 149 | 150 | ### Change the corner roundness 151 | # round_corners= 152 | 153 | ### Disable / hide the hud by default 154 | # no_display 155 | 156 | ### Hud position offset 157 | # offset_x= 158 | # offset_y= 159 | 160 | ### Hud dimensions 161 | # width= 162 | # height= 163 | # table_columns= 164 | # cellpadding_y= 165 | 166 | ### Hud transparency / alpha 167 | # background_alpha=0.5 168 | # alpha= 169 | 170 | ### FCAT overlay 171 | ### This enables an FCAT overlay to perform frametime analysis on the final image stream. 172 | ### Enable the overlay 173 | # fcat 174 | ### Set the width of the FCAT overlay. 175 | ### 24 is a performance optimization on AMD GPUs that should not have adverse effects on nVidia GPUs. 176 | ### A minimum of 20 pixels is recommended by nVidia. 177 | # fcat_overlay_width=24 178 | ### Set the screen edge, this can be useful for special displays that don't update from top edge to bottom. This goes from 0 (left side) to 3 (top edge), counter-clockwise. 179 | # fcat_screen_edge=0 180 | 181 | ### Color customization 182 | # text_color=FFFFFF 183 | # gpu_color=2E9762 184 | # cpu_color=2E97CB 185 | # vram_color=AD64C1 186 | # ram_color=C26693 187 | # engine_color=EB5B5B 188 | # io_color=A491D3 189 | # frametime_color=00FF00 190 | # background_color=020202 191 | # media_player_color=FFFFFF 192 | # wine_color=EB5B5B 193 | # battery_color=FF9078 194 | 195 | ### Specify GPU with PCI bus ID for AMDGPU and NVML stats 196 | ### Set to 'domain:bus:slot.function' 197 | # pci_dev=0:0a:0.0 198 | 199 | ### Blacklist 200 | # blacklist= 201 | 202 | ### Control over socket 203 | ### Enable and set socket name, '%p' is replaced with process id 204 | # control = mangohud 205 | # control = mangohud-%p 206 | 207 | ################ WORKAROUNDS ################# 208 | ### Options starting with "gl_*" are for OpenGL 209 | ### Specify what to use for getting display size. Options are "viewport", "scissorbox" or disabled. Defaults to using glXQueryDrawable 210 | # gl_size_query=viewport 211 | 212 | ### (Re)bind given framebuffer before MangoHud gets drawn. Helps with Crusader Kings III 213 | # gl_bind_framebuffer=0 214 | 215 | ### Don't swap origin if using GL_UPPER_LEFT. Helps with Ryujinx 216 | # gl_dont_flip=1 217 | 218 | ################ INTERACTION ################# 219 | 220 | ### Change toggle keybinds for the hud & logging 221 | # toggle_hud=Shift_R+F12 222 | # toggle_fps_limit=Shift_L+F1 223 | # toggle_logging=Shift_L+F2 224 | # reload_cfg=Shift_L+F4 225 | # upload_log=Shift_L+F3 226 | 227 | #################### LOG ##################### 228 | ### Automatically start the log after X seconds 229 | # autostart_log=1 230 | ### Set amount of time in seconds that the logging will run for 231 | # log_duration= 232 | ### Change the default log interval, 100 is default 233 | log_interval=0 234 | ### Set location of the output files (required for logging) 235 | output_folder=/home/deck/mangologs-vapormark 236 | ### Permit uploading logs directly to FlightlessMango.com 237 | # permit_upload=1 238 | ### Define a '+'-separated list of percentiles shown in the benchmark results 239 | ### Use "AVG" to get a mean average. Default percentiles are 97+AVG+1+0.1 240 | # benchmark_percentiles=97,AVG,1,0.1 241 | -------------------------------------------------------------------------------- /bin/mbench: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import sys 4 | import subprocess 5 | import signal 6 | import glob 7 | import argparse 8 | import psutil 9 | 10 | cur_dir = os.path.dirname(__file__) 11 | running_tasks = [] 12 | 13 | class color: 14 | HEADER = '\033[95m' 15 | BLUE = '\033[94m' 16 | GREEN = '\033[92m' 17 | WARNING = '\033[93m' 18 | FAIL = '\033[91m' 19 | ENDC = '\033[0m' 20 | BOLD = '\033[1m' 21 | UNDERLINE = '\033[4m' 22 | 23 | def print_log(msg): 24 | print(color.BLUE + "# [mbench] " + msg + color.ENDC) 25 | 26 | def print_warning(msg): 27 | print(color.FAIL + "# [mbench] " + msg + color.ENDC) 28 | 29 | def ignore_term_signals(): 30 | term_signals = (signal.SIGTERM, signal.SIGINT, signal.SIGABRT, 31 | signal.SIGBUS, signal.SIGILL, signal.SIGSEGV, 32 | signal.SIGHUP) 33 | for s in term_signals: 34 | # do nothing upon kill signals for graceful exit 35 | signal.signal(s, lambda signum, frame: None) 36 | 37 | def kill_all(parent): 38 | # NOTE 39 | # - SIGINT should be sent since some programs (e.g., trace-cmd) terminate 40 | # gracefully only for SIGINT (Ctrl-C) 41 | 42 | # kill all the process tree of parent 43 | children = psutil.Process(parent.pid).children(True) 44 | for c in children: 45 | try: 46 | c.send_signal(signal.SIGINT) 47 | except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess): 48 | pass 49 | try: 50 | parent.send_signal(signal.SIGINT) 51 | except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess): 52 | pass 53 | # block until all terminated 54 | try: 55 | psutil.wait_procs(children) 56 | except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess): 57 | pass 58 | 59 | def run_bench(args): 60 | # check performcne interference 61 | check_interference(args) 62 | # prep for gracefil termination 63 | ignore_term_signals() 64 | 65 | # prep for logging 66 | subprocess.Popen("mkdir -p " + args.outdir, 67 | shell=True, stdout=None, stderr=None).wait() 68 | # prep command lines 69 | sh_cmds = [] 70 | # 1) background task 71 | if args.bg != None: 72 | sh_cmds.append(args.bg) 73 | # 2) performance monitor 74 | if args.procmon: 75 | global cur_dir 76 | cmd = os.path.join(cur_dir, "procmon") 77 | arg = " -o %s -l %s -a" % (args.outdir, args.log) 78 | sh_cmds.append(cmd + arg) 79 | # 3) benchmark itself 80 | log_fil = os.path.join(args.outdir, args.log + ".schbench_out") 81 | log_out = " 2>&1 | tee " + log_fil 82 | sh_cmds.append(args.fg + log_out) 83 | 84 | # run commands 85 | for sh_cmd in sh_cmds: 86 | print_log(sh_cmd) 87 | p = subprocess.Popen(sh_cmd, shell=True) 88 | # prepend a task to the list 89 | running_tasks.insert(0, p) 90 | 91 | def wait_for_tasks(args): 92 | # wait for the running benchmarks 93 | p = running_tasks[0] 94 | p.wait() 95 | 96 | # kill all others 97 | for p in running_tasks[1:]: 98 | kill_all(p) 99 | 100 | def ps_exist(pname): 101 | for p in psutil.process_iter(): 102 | try: 103 | pinfo = p.as_dict(attrs=['pid', 'name']) 104 | if (pinfo['name'] == pname): 105 | return True 106 | except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess): 107 | pass 108 | return False 109 | 110 | def check_interference(args): 111 | ret = False 112 | 113 | # check option 114 | if args.procmon: 115 | print_warning("-p will affect the benchmarking accuracy.") 116 | ret = True 117 | 118 | # heavy steam os tasks 119 | heavy_bg_tasks = ["steam", "mangoapp", "gamemoded", 120 | "gamescope", "steamwebhelper"] 121 | for t in heavy_bg_tasks: 122 | if ps_exist(t): 123 | print_warning("A heavy background task, %s, is detected. " \ 124 | "It will interfere the accuracy of benchmark." % t) 125 | ret = True 126 | return ret 127 | 128 | 129 | def expand_args_config(args): 130 | # sanity check 131 | if args.fg != None or args.bg != None: 132 | parser.print_help() 133 | print("bench: error: `-c` and `-f/-b` are disjoint." \ 134 | "Only one should be specified.", file = sys.stderr) 135 | exit(1) 136 | 137 | global cur_dir 138 | if args.config == "schbench50": 139 | args.bg = None 140 | args.fg = os.path.join(cur_dir, "schbench") + " -F128 -n10 -r%d -A50" % (args.runtime) 141 | elif args.config == "schbench100": 142 | args.bg = None 143 | args.fg = os.path.join(cur_dir, "schbench") + " -F128 -n10 -r%d" % (args.runtime) 144 | elif args.config == "schbench200": 145 | args.bg = os.path.join(cur_dir, "schbench") + " -F256 -n10 -r%d 2>&1 > /dev/null" % (args.runtime * 2) 146 | args.fg = os.path.join(cur_dir, "schbench") + " -F128 -n10 -r%d" % (args.runtime) 147 | else: 148 | parser.print_help() 149 | print("bench: error: unknown configuration: %s" % args.config, 150 | file = sys.stderr) 151 | exit(1) 152 | 153 | def get_cmd_options(argv): 154 | parser = argparse.ArgumentParser( 155 | prog = "mbench", 156 | description = "Run a micro-benchmark with a pre-configured setting", 157 | epilog = color.WARNING + 158 | """ 159 | Performance monitoring (-p) WILL interfere the results of micro-benchmark. Do NOT use -p when you collect performance results. Instead, run the same benchmark twice: one without profiling for performance comparison and another with profiling for analysis. Also, make sure there is no heavy background task running. 160 | """ + color.ENDC) 161 | parser.add_argument('-o', '--outdir', action='store', required=True, 162 | help='output directory') 163 | parser.add_argument('-l', '--log', action='store', required=True, 164 | help='log file prefix') 165 | parser.add_argument('-b', '--bg', action='store', 166 | help='command line of a background task') 167 | parser.add_argument('-f', '--fg', action='store', 168 | help='command line of a foreground task for benchmarking') 169 | parser.add_argument('-c', '--config', action='store', 170 | help='run a benchmark with preconfigured setting: `schbench50`, `schbench100`, and `schbench200`, each of which runs `schbench` with 50%%, 100%%, and 200%% CPU utilization, respectively') 171 | parser.add_argument('-r', '--runtime', action='store', type=int, default=180, 172 | help='benchmark running time in seconds (default = 180sec)') 173 | parser.add_argument('-p', '--procmon', action='store_true', 174 | help='run with profiling on') 175 | args = parser.parse_args(argv) 176 | 177 | # expand args.config 178 | if args.config != None: 179 | expand_args_config(args) 180 | 181 | # sanity check 182 | if args.fg == None: 183 | parser.print_help() 184 | print("bench: error: either `-f` or `-c` should be specified.", 185 | file = sys.stderr) 186 | exit(1) 187 | 188 | return args 189 | 190 | if __name__ == "__main__": 191 | args = get_cmd_options(sys.argv[1:]) 192 | run_bench(args) 193 | wait_for_tasks(args) 194 | 195 | -------------------------------------------------------------------------------- /bin/energyinsight: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import sys 4 | import subprocess 5 | import signal 6 | import glob 7 | import argparse 8 | import psutil 9 | 10 | def get_log_name(args): 11 | log = os.path.join(args.outdir, args.log + "-energyprof__.log") 12 | return log 13 | 14 | def get_csv_name(args): 15 | log = os.path.join(args.outdir, args.log + "-energyinsight__.csv") 16 | return log 17 | 18 | def get_nr_cpus(s): 19 | nr = 0 20 | toks = s.split(',') 21 | for tok in toks: 22 | cpus = tok.split('-') 23 | if len(cpus) == 2: 24 | nr += int(cpus[1]) - int(cpus[0]) + 1 25 | else: 26 | nr += 1 27 | return str(nr) 28 | 29 | def parse_config(f): 30 | ''' 31 | ## cpu=0, load=1, util=16 32 | ''' 33 | (cpu, nr_cpu, load, util, per_cpu_util) = (None, None, None, None, None) 34 | pos = 0 35 | while True: 36 | pos = f.tell() 37 | line = f.readline() 38 | if line == "": 39 | return (None, None, None, None, None) 40 | toks = line.split() 41 | if len(toks) == 0: 42 | continue 43 | if toks[0] != "##": 44 | continue 45 | cpu = toks[1].split("=")[1][:-1] 46 | nr_cpus = get_nr_cpus(cpu) 47 | load = toks[2].split("=")[1][:-1] 48 | util = toks[3].split("=")[1] 49 | per_cpu_util = str(float(util) / int(nr_cpus)) 50 | break 51 | f.seek(pos) 52 | return (cpu, nr_cpus, load, util, per_cpu_util) 53 | 54 | def get_data_in_2_toks(key, index_toks, data_toks): 55 | idx = next((i for i, x in enumerate(index_toks) if x == key), None) 56 | if idx == None: 57 | return None 58 | return data_toks[idx] 59 | 60 | def do_parse_turbostat(f): 61 | ''' 62 | Avg_MHz Busy% Bzy_MHz TSC_MHz IPC IRQ POLL C1 C2 C3 POLL% C1% C2% C3% CorWatt PkgWatt 63 | 506 18.83 2688 3294 1.52 1156 4 15 51 682 0.00 0.26 0.49 80.94 0.51 2.24 64 | ''' 65 | (avg_mhz, bzy_mhz, ipc, corwatt, pkgwatt) = (None, None, None, None, None) 66 | pos = 0 67 | while True: 68 | # index line 69 | pos = f.tell() 70 | line = f.readline() 71 | index_toks= line.split() 72 | if len(index_toks) == 0: 73 | continue 74 | if index_toks[0] != "Avg_MHz": 75 | if index_toks[0] == "stress-ng:" and index_toks[1] == "metrc:": 76 | break 77 | continue 78 | 79 | # data line 80 | pos = f.tell() 81 | line = f.readline() 82 | data_toks = line.split() 83 | if len(data_toks) == 0: 84 | break 85 | if not data_toks[0][0].isnumeric(): 86 | break 87 | 88 | avg_mhz = get_data_in_2_toks("Avg_MHz", index_toks, data_toks) 89 | bzy_mhz = get_data_in_2_toks("Bzy_MHz", index_toks, data_toks) 90 | ipc = get_data_in_2_toks("IPC", index_toks, data_toks) 91 | corwatt = get_data_in_2_toks("CorWatt", index_toks, data_toks) 92 | pkgwatt = get_data_in_2_toks("PkgWatt", index_toks, data_toks) 93 | 94 | break 95 | f.seek(pos) 96 | return (avg_mhz, bzy_mhz, ipc, corwatt, pkgwatt) 97 | 98 | def parse_turbostat(f): 99 | (avg_mhz, bzy_mhz, ipc, corwatt, pkgwatt) = (0.0, 0.0, 0.0, 0.0, 0.0) 100 | x = 0 101 | # calculate average of turbostat results for one configuration 102 | while True: 103 | (a, b, i, c, p) = do_parse_turbostat(f) 104 | if a == None: 105 | break 106 | avg_mhz += float(a) 107 | bzy_mhz += float(b) 108 | ipc += float(i) 109 | corwatt += float(c) 110 | pkgwatt += float(p) 111 | x += 1 112 | return (str(avg_mhz/x), str(bzy_mhz/x), str(ipc/x), str(corwatt/x), str(pkgwatt/x)) 113 | 114 | def parse_stress_ng(f): 115 | ''' 116 | stress-ng: metrc: [3044] stressor bogo ops real time usr time sys time bogo ops/s bogo ops/s CPU used per RSS Max 117 | stress-ng: metrc: [3044] (secs) (secs) (secs) (real time) (usr+sys time) instance (%) (KB) 118 | stress-ng: metrc: [3044] cpu 23488 120.00 19.20 0.36 195.73 1200.76 1.02 7408 119 | ''' 120 | bogo_ops = None 121 | pos = 0 122 | while True: 123 | # index line 124 | pos = f.tell() 125 | line = f.readline() 126 | index_toks= line.split() 127 | if len(index_toks) == 0: 128 | continue 129 | if index_toks[0] != "stress-ng:" or index_toks[1] != "metrc:": 130 | continue 131 | line = f.readline() 132 | 133 | # data line 134 | pos = f.tell() 135 | line = f.readline() 136 | data_toks = line.split() 137 | if len(data_toks) != 12: 138 | break 139 | if not data_toks[8][0].isnumeric(): 140 | break 141 | bogo_ops = data_toks[8] 142 | break 143 | f.seek(pos) 144 | return (bogo_ops, ) 145 | 146 | def parse_perf_power(f): 147 | ''' 148 | Performance counter stats for 'system wide': 149 | S0 1 285.74 Joules power/energy-pkg/ 150 | ''' 151 | joules = None 152 | pos = 0 153 | while True: 154 | # first line 155 | pos = f.tell() 156 | line = f.readline() 157 | toks= line.split() 158 | if len(toks) == 0: 159 | continue 160 | if toks[0] != "Performance" or toks[1] != "counter" or toks[2] != "stats": 161 | continue 162 | line = f.readline() 163 | 164 | # second line 165 | pos = f.tell() 166 | line = f.readline() 167 | toks = line.split() 168 | if len(toks) != 5: 169 | break 170 | if not toks[2][0].isnumeric(): 171 | break 172 | joules = toks[2].replace(",", "") 173 | line = f.readline() 174 | 175 | # third line 176 | line = f.readline() 177 | break 178 | f.seek(pos) 179 | return (joules, ) 180 | 181 | def gen_data_in_csv(args, data): 182 | csv = get_csv_name(args) 183 | with open(csv, "w") as f: 184 | for row in data: 185 | row_str = "| ".join(row) 186 | print("| " + row_str + " |", file = f) 187 | 188 | def report_energyinsight_in_csv(args): 189 | log = get_log_name(args) 190 | data = [("cpu", "nr_cpus", "load", "util", "per_cpu_util", 191 | "avg_mhz", "bzy_mhz", "ipc", "corwatt", "pkgwatt", 192 | "bogo_ops", 193 | "joules", 194 | "ops/joule")] 195 | 196 | with open(log, 'r') as f: 197 | while True: 198 | c = parse_config(f) 199 | if c[0]== None: 200 | break 201 | 202 | t = parse_turbostat(f) 203 | if t[0] == None: 204 | break 205 | 206 | s = parse_stress_ng(f) 207 | if s[0] == None: 208 | break 209 | 210 | j = parse_perf_power(f) 211 | if j[0] == None: 212 | break 213 | 214 | o = (str(float(s[0]) / float(j[0])), ) 215 | 216 | row = c + t + s + j + o 217 | data.append(row) 218 | 219 | gen_data_in_csv(args, data) 220 | # TODO 221 | # - generate bar graphs per CPU utilization 222 | # - x-axis: per-cpu utilization 223 | # - x-tics: cpu config 224 | # - y1-axis: ops/joule 225 | # - y2-axis: ops 226 | 227 | def get_cmd_options(argv): 228 | 229 | parser = argparse.ArgumentParser( 230 | prog = "energyinsight", 231 | description = "Report energy usage per CPU load and number of online CPUs",) 232 | parser.add_argument('-o', '--outdir', action='store', required=True, 233 | help='output directory') 234 | parser.add_argument('-l', '--log', action='store', required=True, 235 | help='output log file prefix') 236 | parser.add_argument('-q', '--quiet', action='store_true', 237 | help='do not print result to stdout' ) 238 | args = parser.parse_args(argv) 239 | return args 240 | 241 | if __name__ == "__main__": 242 | args = get_cmd_options(sys.argv[1:]) 243 | report_energyinsight_in_csv(args) 244 | 245 | 246 | 247 | -------------------------------------------------------------------------------- /bin/ginsight: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import sys 4 | import argparse 5 | import csv 6 | import matplotlib.pyplot as plt 7 | import zlib 8 | 9 | class time_serise: 10 | time_serise = [] 11 | cdf = [] 12 | num = 0 13 | 14 | def __init__(self, ts): 15 | self.time_serise = ts 16 | self.cdf = ts.copy() 17 | self.cdf.sort() 18 | self.num = len(ts) 19 | 20 | def get_min(self): 21 | return self.get_percentile(0.0) 22 | 23 | def get_nmax(self): 24 | return self.get_percentile(100.0) 25 | 26 | def get_median(self): 27 | return self.get_percentile(50.0) 28 | 29 | def get_percentile(self, p): 30 | # p = [0:100] 31 | i = round( float(self.num - 1) * (p / 100.0) ) 32 | return self.cdf[i] 33 | 34 | def get_average(self): 35 | return sum(self.cdf) / self.num 36 | 37 | def load_mango_csv(csv_name): 38 | def get_sys_info(csv_rd): 39 | sys_info = {} 40 | keys = rd.__next__() 41 | values = rd.__next__() 42 | for (k, v) in zip(keys, values): 43 | sys_info[k] = v 44 | return sys_info 45 | 46 | with open(csv_name, 'r') as f: 47 | rd = csv.reader(f) 48 | try: 49 | # parse system information 50 | sys_info = get_sys_info(rd) 51 | # transpose a row-oriented format to column-oriented format 52 | cols = list( map(lambda c: [c], rd.__next__()) ) 53 | for row in rd: 54 | for (i, elm) in enumerate(row): 55 | cols[i].append( float(elm) ) 56 | except csv.Error as e: 57 | sys.exit('Invalid CSV data at {}@{}: {}'.format( \ 58 | csv_name, rd.line_num, e)) 59 | perf_data = {} 60 | for cl in cols: 61 | k, v = cl[0], time_serise( cl[1:] ) 62 | perf_data[k] = v 63 | return (sys_info, perf_data) 64 | 65 | def get_log_name(args, fea, out): 66 | log = os.path.join(args.outdir, 67 | args.prefix + "-ginsight-" + fea + "." + out) 68 | return log 69 | 70 | def reset_plot(): 71 | plt.clf() 72 | plt.style.use('default') 73 | plt.rcParams['font.size'] = 7 74 | 75 | def hash_rgb_from_str(s): 76 | h = zlib.crc32( bytes(s, 'utf-8') ) 77 | g = (h & 0xFF0000) >> 16 78 | b = (h & 0x0000FF) >> 0 79 | r = (h & 0x00FF00) >> 8 80 | return (float(r)/0xFF, float(g)/0xFF, float(b)/0xFF) 81 | 82 | def gen_dist_fig(args, fea, ts, pss, title, x_label, y_label, min_max): 83 | # prepare canvas 84 | reset_plot() 85 | fig, axs = plt.subplots(nrows=3, ncols=1, figsize=(3.5, 10)) 86 | 87 | # 1) violin plot 88 | # - convert pss to quantile for violin plot 89 | quan = [] 90 | for ps in pss: 91 | p, k = ps 92 | quan.append( p / 100.0 ) 93 | # - plot 94 | ax = axs[0] 95 | violin = ax.violinplot([ts.time_serise], showmeans=True, quantiles=[ quan ]) 96 | violin['bodies'][0].set_facecolor( hash_rgb_from_str(y_label) ) 97 | violin['cmeans'].set_edgecolor('red') 98 | # - set title 99 | ax.set_title(title) 100 | # - decoration 101 | ax.set_ylim(bottom = min_max[0], top = min_max[1]) 102 | ax.set_xlabel(x_label) 103 | ax.set_ylabel(y_label) 104 | 105 | # 2) cdf plot 106 | # - plot 107 | ax = axs[1] 108 | ax.plot(range(ts.num), ts.cdf, label='CDF', 109 | linewidth=1, color='black', marker='*') 110 | # - plot stats 111 | y = ts.get_average() 112 | ax.plot([0, ts.num], [y, y], label="Average: %.1f" % y) 113 | for p, l in pss: 114 | y = ts.get_percentile(p) 115 | ax.plot([0, ts.num], [y, y], label=l + ": %.1f" % y) 116 | # - decoration 117 | ax.set_ylim(bottom = min_max[0], top = min_max[1]) 118 | ax.set_xlabel(x_label) 119 | ax.set_ylabel(y_label) 120 | ax.legend() 121 | 122 | # 3) time serise plot 123 | ax = axs[2] 124 | ax.plot(range(ts.num), ts.time_serise, 125 | label='FPS', linewidth=1, color='black') 126 | # - decoration 127 | ax.set_ylim(bottom = min_max[0], top = min_max[1]) 128 | ax.set_xlabel(x_label) 129 | ax.set_ylabel(y_label) 130 | 131 | # save to the file 132 | fig_name = get_log_name(args, fea, "svg") 133 | plt.savefig(fig_name) 134 | plt.close() 135 | 136 | def gen_ts_fig(args, fea, ts, pss, title, x_label, y_label, min_max): 137 | # clear canvas 138 | reset_plot() 139 | plt.figure(figsize=(3.5,3)) 140 | 141 | # plot FPS overtime 142 | plt.plot(range(ts.num), ts.time_serise, label='FPS', linewidth=1, color='black') 143 | 144 | # decoration 145 | plt.ylim(bottom = min_max[0], top = min_max[1]) 146 | plt.xlabel(x_label) 147 | plt.ylabel(y_label) 148 | plt.title(title) 149 | 150 | # save to the file 151 | fig_name = get_log_name(args, fea + "-ts", "svg") 152 | plt.savefig(fig_name) 153 | plt.close() 154 | 155 | def gen_violin_fig(args, fea, ts, pss, title, x_label, y_label, min_max): 156 | # clear canvas 157 | reset_plot() 158 | plt.figure(figsize=(3.5,3)) 159 | 160 | # pss to quantile 161 | quan = [] 162 | for ps in pss: 163 | p, k = ps 164 | quan.append( p / 100.0 ) 165 | 166 | # plot 167 | violin = plt.violinplot([ts], showmeans=True, quantiles=[ quan ]) 168 | violin['bodies'][0].set_facecolor( hash_rgb_from_str(y_label) ) 169 | violin['cmeans'].set_edgecolor('red') 170 | 171 | # decoration 172 | plt.ylim(bottom = min_max[0], top = min_max[1]) 173 | plt.xlabel(x_label) 174 | plt.ylabel(y_label) 175 | plt.title(title) 176 | 177 | # save to the file 178 | fig_name = get_log_name(args, fea + "-violin", "svg") 179 | plt.savefig(fig_name) 180 | plt.close() 181 | 182 | def gen_cdf_fig(args, fea, ts, pss, title, x_label, y_label, min_max): 183 | # clear canvas 184 | reset_plot() 185 | plt.figure(figsize=(3.5,3)) 186 | 187 | # plot cdf 188 | plt.plot(range(ts.num), ts.cdf, label='CDF', 189 | linewidth=1, color='black', marker='*') 190 | # plot stats 191 | y = ts.get_average() 192 | plt.plot([0, ts.num], [y, y], label="Average: %.1f" % y) 193 | for p, l in pss: 194 | y = ts.get_percentile(p) 195 | plt.plot([0, ts.num], [y, y], label=l + ": %.1f" % y) 196 | 197 | # decoration 198 | plt.ylim(bottom = min_max[0], top = min_max[1]) 199 | plt.xlabel(x_label) 200 | plt.ylabel(y_label) 201 | plt.title(title) 202 | plt.legend() 203 | 204 | # save to the file 205 | fig_name = get_log_name(args, fea + "-cdf", "svg") 206 | plt.savefig(fig_name) 207 | plt.close() 208 | 209 | def gen_cdf_csv(args, ts, pss, y_label, f): 210 | # header 211 | print("%s, %s" % ("{0:^20}".format("Stat"), "{0:^20}".format(y_label)), 212 | file = f ) 213 | 214 | # stat 215 | y = ts.get_average() 216 | print("%s, %s" % ("{0:<20}".format("Average"), "{0:>20}".format("%.4f" % y)), 217 | file = f) 218 | for p, l in pss: 219 | y = ts.get_percentile(p) 220 | print("%s, %s" % ("{0:<20}".format(l), "{0:>20}".format("%.4f" % y)), 221 | file = f) 222 | low1 = ts.get_percentile(1.0) 223 | med = ts.get_percentile(50.0) 224 | y = low1 / med 225 | print("%s, %s" % ("{0:<20}".format("Low1-Med ratio"), 226 | "{0:>20}".format("%.4f" % y)), 227 | file = f) 228 | 229 | def report_stat(args, sys_info, perf_data): 230 | class report_conf: 231 | key = "" 232 | pss = [] 233 | title = "" 234 | x_label = "" 235 | y_label = "" 236 | min_max = (0.0, 0.0) 237 | def __init__(self, k, p, t, x, y, mX): 238 | self.key = k 239 | self.pss = p 240 | self.title = t 241 | self.x_label = x 242 | self.y_label = y 243 | self.min_max = mX 244 | 245 | # FIXME: ad-hoc code 246 | y_fps= 1.0 247 | if args.prefix == "troy-low-battle-benchmark": 248 | y_fps= 2.0 249 | title = args.prefix 250 | confs = [report_conf("fps", 251 | [(50.0, "Median"), (0.0, "Min"), (100.0, "Max"), 252 | (0.1, "Low 0.1%"), (1.0, "Low 1%"), (97.0, "Low 97%"),], 253 | title, "frames", "FPS", (0.0, 120.0 * y_fps)), 254 | report_conf("frametime", 255 | [(50.0, "Median"), (0.0, "Min"), (100.0, "Max"), 256 | (99.0, "High 1%"), (99.9, "High 0.1%"), ], 257 | title, "frames", "frametime (usec)", (0.0, 200000.0)), 258 | report_conf("cpu_load", 259 | [(50.0, "Median"), (0.0, "Min"), (100.0, "Max")], 260 | title, "", "cpu load (%)", (0.0, 100.0)), 261 | report_conf("gpu_load", 262 | [(50.0, "Median"), (0.0, "Min"), (100.0, "Max")], 263 | title, "", "gpu load (%)", (0.0, 100.0)), 264 | report_conf("ram_used", 265 | [(50.0, "Median"), (0.0, "Min"), (100.0, "Max")], 266 | title, "", "ram used (GB)", (0.0, 16.0)),] 267 | 268 | for c in confs: 269 | ts = perf_data[c.key] 270 | # generate cdf stat 271 | log = get_log_name(args, c.key, "csv") 272 | with open(log, 'w') as f: 273 | gen_cdf_csv(args, ts, c.pss, c.y_label, f) 274 | if args.quiet == False: 275 | gen_cdf_csv(args, ts, c.pss, c.y_label, sys.stdout) 276 | # generate distribution graphs 277 | gen_violin_fig(args, c.key, ts.time_serise, c.pss, c.title, c.x_label, c.y_label, c.min_max) 278 | gen_cdf_fig(args, c.key, ts, c.pss, c.title, c.x_label, c.y_label, c.min_max) 279 | gen_ts_fig(args, c.key, ts, c.pss, c.title, c.x_label, c.y_label, c.min_max) 280 | gen_dist_fig(args, c.key, ts, c.pss, c.title, c.x_label, c.y_label, c.min_max) 281 | 282 | def get_cmd_options(argv): 283 | parser = argparse.ArgumentParser( 284 | prog = "ginsight", 285 | description = "Generarte a report from MangoHud log") 286 | parser.add_argument('-l', '--log', action='store', required=True, 287 | help='MangoHud log file in a CSV format') 288 | parser.add_argument('-o', '--outdir', action='store', required=True, 289 | help='output directory') 290 | parser.add_argument('-p', '--prefix', action='store', required=True, 291 | help='output file prefix') 292 | parser.add_argument('-q', '--quiet', action='store_true', 293 | help='do not print result to stdout' ) 294 | args = parser.parse_args(argv) 295 | return args 296 | 297 | if __name__ == "__main__": 298 | args = get_cmd_options(sys.argv[1:]) 299 | sys_info, perf_data = load_mango_csv(args.log) 300 | report_stat(args, sys_info, perf_data) 301 | 302 | 303 | 304 | -------------------------------------------------------------------------------- /bin/procinsight: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import sys 4 | import subprocess 5 | import signal 6 | import glob 7 | import argparse 8 | import psutil 9 | 10 | MAX_NCPUS = 4096 11 | 12 | def get_log_name(args, mon): 13 | log = os.path.join(args.outdir, 14 | args.log + "-procmon-" + mon + "__.log") 15 | return log 16 | 17 | def get_csv_name(args, mon, scope): 18 | csv = os.path.join(args.outdir, 19 | args.log + "-procinsight-" + mon + "-" + scope + ".csv") 20 | return csv 21 | 22 | def transpose_2d_list(ll): 23 | return list(zip(*ll)) 24 | 25 | def report_stat_in_csv(col_names, stat_ll, f): 26 | def get_sep(c, ncol): 27 | if c == (ncol - 1): 28 | return "\n" 29 | else: 30 | return ", " 31 | 32 | def is_float(n): 33 | return int(n) != n 34 | 35 | # print column header 36 | ncol = len(stat_ll[0]) + 1 37 | col_names = col_names[0:ncol] 38 | for c, col in enumerate(col_names): 39 | print("{0:^20}".format(col), end=get_sep(c, ncol), file = f) 40 | 41 | # print tuple by tuple 42 | ncol = len(stat_ll[0]) 43 | nrow = len(stat_ll) 44 | for r in range(nrow): 45 | key, _= stat_ll[r][0] 46 | print("{0:<20}".format(key), end=", ", file = f) 47 | for c in range(ncol): 48 | _, val= stat_ll[r][c] 49 | if val is None: 50 | print("{0:>20}".format("%s" % "N/A"), end=get_sep(c, ncol), file = f) 51 | elif is_float(val): 52 | print("{0:>20}".format("%.4f" % val), end=get_sep(c, ncol), file = f) 53 | else: 54 | print("{0:>20}".format(int(val)), end=get_sep(c, ncol), file = f) 55 | 56 | def report_procmon_sched_in_csv(args): 57 | # get stat 58 | (sched_sw_stat, sched_core_stat) = procmon_stat_sched(args) 59 | sched_sw_stat = [sched_sw_stat] 60 | col_names = ["scope", "system"] 61 | 62 | # generate csv for system-wide state 63 | csv_name = get_csv_name(args, "sched", "sw") 64 | with open(csv_name, "w") as f: 65 | report_stat_in_csv(col_names, sched_sw_stat, f) 66 | 67 | if args.quiet == False: 68 | print("## Sched_wakeup count", file = sys.stdout) 69 | report_stat_in_csv(col_names, sched_sw_stat, sys.stdout) 70 | print("\n") 71 | 72 | # generate csv for per-core state 73 | csv_name = get_csv_name(args, "sched", "core") 74 | with open(csv_name, "w") as f: 75 | report_stat_in_csv(col_names, sched_core_stat, f) 76 | 77 | def report_procmon_cstate_in_csv(args): 78 | # get stat 79 | (cstate_sw_stat, cstate_core_stat) = procmon_stat_cstate(args) 80 | cstate_sw_stat = transpose_2d_list( [cstate_sw_stat] ) 81 | cstate_core_stat = transpose_2d_list(cstate_core_stat) 82 | 83 | # generate csv for system-wide state 84 | col_names = ["info", "system"] 85 | csv_name = get_csv_name(args, "cstate", "sw") 86 | with open(csv_name, "w") as f: 87 | report_stat_in_csv(col_names, cstate_sw_stat, f) 88 | 89 | if args.quiet == False: 90 | print("## Cstate states", file = sys.stdout) 91 | report_stat_in_csv(col_names, cstate_sw_stat, sys.stdout) 92 | print("\n") 93 | 94 | # generate csv for per-core state 95 | col_names = ["info"] + \ 96 | list( map(lambda c: "CPU" + str(c), range(MAX_NCPUS)) ) 97 | csv_name = get_csv_name(args, "cstate", "core") 98 | with open(csv_name, "w") as f: 99 | report_stat_in_csv(col_names, cstate_core_stat, f) 100 | 101 | def report_procmon_energy_in_csv(args): 102 | # get stat 103 | energy_sw_stat = transpose_2d_list( [procmon_stat_energy(args)] ) 104 | 105 | # generate csv for system-wide state 106 | col_names = ["energy", "system"] 107 | csv_name = get_csv_name(args, "energy", "sw") 108 | with open(csv_name, "w") as f: 109 | report_stat_in_csv(col_names, energy_sw_stat, f) 110 | 111 | if args.quiet == False: 112 | print("## Energy consumption", file = sys.stdout) 113 | report_stat_in_csv(col_names, energy_sw_stat, sys.stdout) 114 | print("\n") 115 | 116 | def report_procmon_perf_in_csv(args): 117 | # get stat 118 | perf_sw_stat = transpose_2d_list( [procmon_stat_perf(args)] ) 119 | 120 | # generate csv for system-wide state 121 | col_names = ["info", "system"] 122 | csv_name = get_csv_name(args, "perf", "sw") 123 | with open(csv_name, "w") as f: 124 | report_stat_in_csv(col_names, perf_sw_stat, f) 125 | 126 | if args.quiet == False: 127 | print("## Performance counters", file = sys.stdout) 128 | report_stat_in_csv(col_names, perf_sw_stat, sys.stdout) 129 | print("\n") 130 | 131 | def report_procmons_in_csv(args): 132 | # -s, --sched 133 | report_procmon_sched_in_csv(args) 134 | # -c, --cstate 135 | report_procmon_cstate_in_csv(args) 136 | # -e, --energy 137 | report_procmon_energy_in_csv(args) 138 | # -p, --perf 139 | report_procmon_perf_in_csv(args) 140 | 141 | def str_to_nstr(s): 142 | ns = map(lambda c: c if c != ',' and c != '%' else '', s) 143 | return "".join( list(ns) ) 144 | 145 | def procmon_stat_sched(args): 146 | sw_stat, core_stat = [0], [0] 147 | log = get_log_name(args, "sched") 148 | 149 | with open(log, 'r') as f: 150 | def get_kv(line): 151 | last_wd = line.split()[-1] 152 | toks = line.split()[-1].split('=') 153 | if len(toks) != 2: 154 | return ("", "") 155 | k, v = line.split()[-1].split('=') 156 | return (k, v) 157 | 158 | # read the first line, "cpus=16" and do sanity check 159 | # then initialize the per-core stat 160 | while True: 161 | k, v = get_kv(f.readline()) 162 | if k == "cpus": 163 | ncpus = int(v) 164 | break 165 | core_stat = [0] * ncpus 166 | 167 | # read the rest lines, "... target_cpu=006" 168 | for line in f: 169 | k, v = get_kv(line) 170 | if k != "target_cpu": 171 | continue 172 | cpu_id = int(v) 173 | core_stat[cpu_id] = core_stat[cpu_id] + 1 174 | 175 | # update the system-wide stat 176 | sw_stat[0] = sum(core_stat) 177 | 178 | # convert to a list of tuples 179 | sw_stat2 = list( map(lambda v: ("sched_wakeup", v), sw_stat) ) 180 | core_stat2= list( map(lambda kv: [("CPU" + str(kv[0]), kv[1])], \ 181 | list( zip(range(len(core_stat)), core_stat) )) ) 182 | return sw_stat2, core_stat2 183 | 184 | def procmon_stat_cstate(args): 185 | sw_stat, core_stat = [], [[]] * MAX_NCPUS 186 | log = get_log_name(args, "cstate") 187 | 188 | with open(log, 'r') as f: 189 | def tokenize(line): 190 | return line.replace('|', ' ').split() 191 | 192 | # get column names 193 | col_names = [] 194 | for line in f: 195 | tokens = tokenize(line) 196 | if tokens[0] == "CPU": 197 | for token in tokens: 198 | col_names.append(token) 199 | break 200 | 201 | # parse per-core stat 202 | for line in f: 203 | # read stat 204 | stat_dict = {} 205 | for (col_name, token) in zip(col_names, tokenize(line)): 206 | stat_dict[col_name] = token 207 | # rearragne stat 208 | def get_kv(dict, k): 209 | return (k, dict[k]) 210 | col_order = ('C0', 'POLL', 'C1', 'C2', 'C3', 'Freq') 211 | stat_list = [] 212 | for col in col_order: 213 | stat_list.append( (col, float(stat_dict[col])) ) 214 | stat_dict['CPU'] = int(stat_dict['CPU']) 215 | cpu_id = stat_dict['CPU'] 216 | core_stat[cpu_id] = stat_list 217 | ncpu = cpu_id + 1 218 | core_stat = core_stat[0:ncpu] 219 | 220 | # aggregate per-core state to system-wide stat 221 | sw_stat = core_stat[0].copy() 222 | for cs in core_stat[1:]: 223 | for pos, c in enumerate(cs): 224 | sk, sv = sw_stat[pos] 225 | ck, cv = c 226 | sw_stat[pos] = (sk, sv + cv) 227 | for pos, s in enumerate(sw_stat): 228 | sk, sv = sw_stat[pos] 229 | sw_stat[pos] = (sk, sv / float(ncpu)) 230 | 231 | return sw_stat, core_stat 232 | 233 | def parse_log_ef(log, parse_tbl): 234 | # parse the log 235 | stat = {} 236 | with open(log, 'r') as f: 237 | for line in f: 238 | for key, (pos, name) in parse_tbl: 239 | if line.find(key) >= 0: 240 | tokens = line.split() 241 | stat[name] = float(str_to_nstr(tokens[pos])) 242 | 243 | # sort statistics for easier interpretation 244 | stat2 = [] 245 | for key, (pos, name) in parse_tbl: 246 | stat2.append( (name, stat.get(name)) ) 247 | return stat2 248 | 249 | def procmon_stat_energy(args): 250 | parse_tbl = ( ("Joules", (2, "J")), 251 | ("seconds", (0, "__seconds")) ) 252 | 253 | log = get_log_name(args, "energy") 254 | stat = parse_log_ef(log, parse_tbl) 255 | # add energy (J/sec) 256 | sec = float(stat[1][1]) 257 | stat[1] = ("J/sec", stat[0][1]/sec) 258 | return stat 259 | 260 | def procmon_stat_perf(args): 261 | parse_tbl = ( ("seconds time elapsed", (0, "time (sec)")), 262 | ("cycles ", (0, "cycles")), 263 | ("instructions", (0, "instructions")), 264 | ("instructions", (3, "ipc")), 265 | ("stalled-cycles-frontend", (3, "frontend-stall (%)")), 266 | ("stalled-cycles-backend", (3, "backend-stall (%)")), 267 | ("branches", (0, "branches")), 268 | ("branch-misses", (3, "branch-misses (%)")), 269 | ("page-faults", (0, "page-faults")), 270 | ("context-switches", (0, "context-switches")), 271 | ("cpu-migrations", (0, "cpu-migrations")), ) 272 | 273 | log = get_log_name(args, "perf") 274 | stat = parse_log_ef(log, parse_tbl) 275 | return stat 276 | 277 | def get_cmd_options(argv): 278 | parser = argparse.ArgumentParser( 279 | prog = "procinsight", 280 | description = "Report CPU statistics and system-wide scheduling statistics",) 281 | parser.add_argument('-o', '--outdir', action='store', required=True, 282 | help='output directory') 283 | parser.add_argument('-l', '--log', action='store', required=True, 284 | help='log file prefix') 285 | parser.add_argument('-q', '--quiet', action='store_true', 286 | help='do not print result to stdout' ) 287 | args = parser.parse_args(argv) 288 | return args 289 | 290 | if __name__ == "__main__": 291 | args = get_cmd_options(sys.argv[1:]) 292 | report_procmons_in_csv(args) 293 | 294 | 295 | 296 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | `vapormark` is a benchmark framework developed for measuring various 2 | performance metrics (e.g., throughput, latency, and tail latency) and the 3 | process states (e.g., backend stall, energy consumption) while running a 4 | program on Linux. It especially targets `SteamOS` -- a Linux-based gaming 5 | device --but most features are genetically useful in regular Linux 6 | environments. 7 | 8 | 9 | Three Phases 10 | ------------ 11 | `vapormark` consists of three phases: 12 | 1. running a benchmark (i.e., collecting performance data) 13 | 2. analyzing the collected data 14 | 3. generating a report. 15 | 16 | Only the first step should run on a target device, such as `SteamDeck`. The 17 | others can be run on almost any Linux box. 18 | 19 | External Dependencies 20 | --------------------- 21 | `vapormark` uses the following in each phase: 22 | - running a benchmark (i.e., collecting performance data) 23 | - [schbench](https://kernel.googlesource.com/pub/scm/linux/kernel/git/mason/schbench/), 24 | [stress-ng](https://github.com/ColinIanKing/stress-ng), and 25 | [gbench](https://github.com/Igalia/vapormark/tree/main/micro-bench/gbench) 26 | for micro-benchmarking of scheduler performance 27 | - [MangoHud](https://github.com/flightlessmango/MangoHud) for measuring 28 | FPS (frame per second) during a running game 29 | - `strace`, `trace-cmd`, `cpupower`, `turbostat`, `chcpu`, `taskset`, and `perf` for collecting processor states 30 | - analyzing the collected data 31 | - `matplotlib`, `graphviz`, and `numpy` python library for generating graphs 32 | - generating a report 33 | - `pandoc` for generating a report in HTML format 34 | - for all phases 35 | - `python3` 36 | 37 | Installation 38 | ------------ 39 | Just clone this repository and hit `make` on the top of the directory. The 40 | build procedure will clone and build `schbench`. All the binaries will be put 41 | under the `vapormark/bin` directory. 42 | 43 | If you want to measure FPS, install `MangoHud`. For `SteamDeck`, please refer 44 | the following steps: 45 | 1. Press `STEAM` button then choose `Power -> Switch to Desktop` 46 | 2. On Plasma Desktop, launch `Discover Center`. Then find and install `MangoHud` 47 | 3. Copy the ManguHud configuration file, `vapormark/config/MangoHud.conf` to 48 | `/home/deck/.config/MangoHud/MangoHud.conf`. This is the minimal MangoHud 49 | configuration that `vapormark` expects. MangoHud logs will be stored to 50 | the `/home/deck/mangologs-vapormark/` directory. 51 | 52 | Running a benchmark and collecting performance data 53 | --------------------------------------------------- 54 | 55 | #### `scmon`: collecting system usage of a process tree 56 | 57 | `scmon` collects a system call usage of a process tree. It generates per-task 58 | system call trace file under `OUTDIR` with prefix `LOG` and suffix `-scmon.*`. 59 | It imposes noticeable performance overhead so it should not be used when 60 | collecting performance numbers. `scmon` is useful to understand the high-level 61 | behavioral traits of an application. 62 | 63 | ``` 64 | usage: scmon [-h] -o OUTDIR -l LOG [-p PID] [-r ROOT] [-n NAME] [-c CMD [CMD ...]] 65 | 66 | Collect system call usage statistics of a program 67 | 68 | options: 69 | -h, --help show this help message and exit 70 | -o OUTDIR, --outdir OUTDIR 71 | output directory 72 | -l LOG, --log LOG log file prefix 73 | -p PID, --pid PID process id to monitor 74 | -r ROOT, --root ROOT root process id to monitor (all decendents will be monitored) 75 | -n NAME, --name NAME name of a process to monitor 76 | -c CMD [CMD ...], --cmd CMD [CMD ...] 77 | command to execute 78 | 79 | For example, 'scmon -o log -l steam -n steam' to log the system call usage of 'steam' 80 | and all its decendents under log/steam*-scmon*. 81 | ``` 82 | 83 | #### `procmon`: collecting processor and scheduling statistics 84 | 85 | `procmon` collects four types of information: 1) scheduler's wakeup events, 2) 86 | CPU's c-state, 3) CPU's energy consumption, and 4) processor's performance 87 | monitoring data (e.g., instruction per cycle). Similar to `scmon`, it generates 88 | logs under `OUTDIR` with prefix `LOG` and suffix `-procmon.*`. It collects 89 | information while it runs. The runtime overhead is not marginal so it can be 90 | run with an application level benchmark (like game). However, it is not 91 | recommended with a micro-benchmark (`schdbench`), which is much more sensitive 92 | to any noises. 93 | 94 | ``` 95 | usage: procmon [-h] -o OUTDIR -l LOG [-s] [-c] [-e] [-p] [-a] 96 | 97 | Collect CPU statistics and system-wide scheduling statistics 98 | 99 | options: 100 | -h, --help show this help message and exit 101 | -o OUTDIR, --outdir OUTDIR 102 | output directory 103 | -l LOG, --log LOG log file prefix 104 | -s, --sched trace wake-up events of process scheduler 105 | -c, --cstate trace c-state of all CPUs 106 | -e, --energy trace energy consumption of all CPUs 107 | -p, --perf trace performance statistics of all CPUs 108 | -a, --all trace all statistics 109 | 110 | procmon internally uses 'trace-cmd', 'cpupower', and 'perf'. 111 | ``` 112 | 113 | #### `mbench`: running a micro-benchmark 114 | `mbench` is a wrapper which runs `schbench` with a pre-configured settings. For 115 | convenience, it launches `procmon` if necessary. However, to get accurate 116 | performance results, it is recommended `mbench` with and without `procmon`. 117 | Also, make sure there is no heavy background tasks: for example, in `SteamOS`, 118 | `steam`, `mangoapp`, `gamemoded`, `gamescope`, and `steamwebhelper`. 119 | 120 | ``` 121 | usage: mbench [-h] -o OUTDIR -l LOG [-b BG] [-f FG] [-c CONFIG] [-r RUNTIME] [-p] 122 | 123 | Run a micro-benchmark with a pre-configured setting 124 | 125 | options: 126 | -h, --help show this help message and exit 127 | -o OUTDIR, --outdir OUTDIR 128 | output directory 129 | -l LOG, --log LOG log file prefix 130 | -b BG, --bg BG command line of a background task 131 | -f FG, --fg FG command line of a foreground task for benchmarking 132 | -c CONFIG, --config CONFIG 133 | run a benchmark with preconfigured setting: `schbench50`, 134 | `schbench100`, and `schbench200`, each of which runs `schbench` with 135 | 50%, 100%, and 200% CPU utilization, respectively 136 | -r RUNTIME, --runtime RUNTIME 137 | benchmark running time in seconds (default = 90sec) 138 | -p, --procmon run with profiling on 139 | 140 | Performance monitoring (-p) WILL interfere the results of micro-benchmark. Do NOT use -p when 141 | you collect performance results. Instead, run the same benchmark twice: one without profiling 142 | for performance comparison and another with profiling for analysis. Also, make sure there is 143 | no heavy background task running. 144 | ``` 145 | 146 | #### `MangoHud`: measuring FPS, CPU/GPU utilization, etc. 147 | Launching, starting, and stopping `MangoHud` is not integrated with 148 | `vapormark`. Hence `vapormark` just follows the standard `MangoHud` usage. 149 | Especially in `SteamDeck`, please refer to the following procedure: 150 | 151 | - For a game to FPS logging, go to `Properties -> General -> Launch Options` 152 | and add `mangohud %command%`. *The game must be launched in **Desktop Mode 153 | (not in Gaming Mode)** to log FPS and other system stats.* 154 | 155 | - Now, you will see the overlay window showing FPS when launching the game. You 156 | can start and stop FPS logging by hitting `Shift_L+F2`. The log will be stored 157 | at `/home/deck/mangologs-vapormark`. Some games hang when MangoHud is enabled. 158 | Other useful MangoHud shortcuts are as follows: 159 | 160 | ``` 161 | Shift_L+F2 : Toggle Logging 162 | Shift_L+F4 : Reload Config 163 | Shift_R+F12 : Toggle Hud 164 | ``` 165 | 166 | - Once you finish FPS logging by hitting `Shift_L+F2`, `MangoHud` will generate 167 | a `csv` log file under `/home/deck/mangologs-vapormark`. Please copy and 168 | rename it ending with `-mangohud.csv` for analysis and report generation. 169 | 170 | 171 | - Following games provide in-game benchmarks: 172 | 173 | | Game | How to start an in-game benchmark | 174 | | :--------------------- | :-------------------------------- | 175 | | Far Cry: New Dawn | `Options -> Benchmark` | 176 | | A Total War Saga: Troy | `Options -> Graphics -> Advanced -> Benchmark` | 177 | | Cyber Punk 2077 | `Settings -> Graphics -> Quick Preset, Run Benchmark` | 178 | | Factorio | On terminal: `factorio --benchmark` [map.zip](https://factoriobox.1au.us/map/download/91c009e61f44c3c532f7152b0501ea0fc920723148dd1c38c4da129eb9d399f9.zip) `--benchmark-ticks 1000 --disable-audio` | 179 | 180 | 181 | #### `schedmon`: collecting the detailed scheduling activities 182 | `schedmon` collects the detailed system-wide scheduling activities. It 183 | internally relies on `perf sched record` command. To collect the kernel 184 | symbol names correctly, please run the following. 185 | 186 | ``` 187 | $> echo 1 > /proc/sys/kernel/kptr_restrict 188 | ``` 189 | 190 | 191 | ``` 192 | usage: schedmon [-h] -o OUTDIR -l LOG 193 | 194 | Collect the detailed scheduler activity internally using `perf sched` 195 | 196 | options: 197 | -h, --help show this help message and exit 198 | -o OUTDIR, --outdir OUTDIR 199 | output directory 200 | -l LOG, --log LOG log file prefix 201 | ``` 202 | 203 | #### `energyprof`: collecting energy and performance statistics according to CPU load 204 | `energyprof` measures energy consumption and performance while running 205 | `stress-ng` with various CPU loads and various numbers of online CPUs. It 206 | internally relies on `turbostat`, `stress-ng`, `chcpu`, `taskset`, and `perf`. 207 | 208 | ``` 209 | usage: energyprof [-h] -c NUM_CPUS [-t TIME_SEC] -o OUTDIR -l LOG 210 | 211 | Collect energy and performance statistics according to CPU load 212 | 213 | options: 214 | -h, --help show this help message and exit 215 | -c NUM_CPUS, --num_cpus NUM_CPUS 216 | number of CPUs of this machine 217 | -t TIME_SEC, --time_sec TIME_SEC 218 | time in sec to run stress-_ng 219 | -o OUTDIR, --outdir OUTDIR 220 | output directory 221 | -l LOG, --log LOG log file prefix 222 | ``` 223 | 224 | Analyzing the collected data 225 | ---------------------------- 226 | Once the performance data is collected, it is time to analyze the results. In 227 | this phase, `vapormark` transforms various log files into the standard CSV 228 | format and produces the latency distribution graphs. Specifically, it provides 229 | the following commands. The generated files have a suffix of its program, 230 | `*-scinsight*`, `*-procinsight*`, `*-ginsight*`, and `*-energyprof*`. 231 | 232 | #### `scinsight`: analyzing `scmon` logs 233 | ``` 234 | usage: scinsight [-h] -o OUTDIR -l LOG [-q] 235 | 236 | Report system call usage statistics of a program 237 | 238 | options: 239 | -h, --help show this help message and exit 240 | -o OUTDIR, --outdir OUTDIR 241 | output directory 242 | -l LOG, --log LOG log file prefix, or path to directory containing log files 243 | -q, --quiet do not print result to stdout 244 | ``` 245 | 246 | 247 | 248 | #### `procinsight`: analyzing `procmon` logs 249 | ``` 250 | usage: procinsight [-h] -o OUTDIR -l LOG [-q] 251 | 252 | Report CPU statistics and system-wide scheduling statistics 253 | 254 | options: 255 | -h, --help show this help message and exit 256 | -o OUTDIR, --outdir OUTDIR 257 | output directory 258 | -l LOG, --log LOG log file prefix 259 | -q, --quiet do not print result to stdout 260 | ``` 261 | 262 | 263 | 264 | #### `ginsight`: analyzing a `MangoHud` log 265 | ``` 266 | usage: ginsight [-h] -l LOG -o OUTDIR -p PREFIX [-q] 267 | 268 | Generarte a report from MangoHud log 269 | 270 | options: 271 | -h, --help show this help message and exit 272 | -l LOG, --log LOG MangoHud log file in a CSV format 273 | -o OUTDIR, --outdir OUTDIR 274 | output directory 275 | -p PREFIX, --prefix PREFIX 276 | output file prefix 277 | -q, --quiet do not print result to stdout 278 | ``` 279 | 280 | #### `schedinsight`: analyzing the results of `schedmon` 281 | 282 | ``` 283 | usage: schedinsight [-h] -l LOGDIR -p PREFIX -o OUTPUT [-i IMGTYPE] [-k] [-s MINSCHED] [-t TIMELIMIT] 284 | 285 | Report the detailed analysis of scheduliing activities collected by `perf sched record` 286 | 287 | options: 288 | -h, --help show this help message and exit 289 | -l LOGDIR, --logdir LOGDIR 290 | a log directory 291 | -p PREFIX, --prefix PREFIX 292 | log file prefix 293 | -o OUTPUT, --output OUTPUT 294 | a target report file name in markdown format 295 | -i IMGTYPE, --imgtype IMGTYPE 296 | type of image format (png, svg) 297 | -k, --pickle use pickle whenever possible 298 | -s MINSCHED, --minsched MINSCHED 299 | set the minimum number of schedules for task analysis 300 | -t TIMELIMIT, --timelimit TIMELIMIT 301 | time limit to draw a graph in seconds 302 | ``` 303 | 304 | #### `energyinsight`: analyzing the results of `energyprof` 305 | 306 | ``` 307 | usage: energyinsight [-h] -o OUTDIR -l LOG [-q] 308 | 309 | Report energy usage per CPU load and number of online CPUs 310 | 311 | options: 312 | -h, --help show this help message and exit 313 | -o OUTDIR, --outdir OUTDIR 314 | output directory 315 | -l LOG, --log LOG output log file prefix 316 | -q, --quiet do not print result to stdout 317 | ``` 318 | 319 | 320 | 321 | Generating a (comparison) report 322 | -------------------------------- 323 | 324 | `vapormark` provides a reporting feature that compares the results of multiple 325 | configurations. This is especially useful when checking the impact of a certain 326 | optimization. When more than one log directories are given (with multiple -l 327 | options), `report` uses the logs in the first directory as a baseline and shows 328 | the relative delta in percent. Note that `report` does not support the 329 | comparison of `schedmon` logs yet. 330 | 331 | ``` 332 | usage: report [-h] -l LOGDIR -p PREFIX -o OUTPUT [-f] [-g] 333 | 334 | Generate a report of given log directories 335 | 336 | options: 337 | -h, --help show this help message and exit 338 | -l LOGDIR, --logdir LOGDIR 339 | a log directory. When mulltiple `-l` options are given, comparison 340 | will be reported using the first one as a baseline. 341 | -p PREFIX, --prefix PREFIX 342 | log file prefix for report generation 343 | -o OUTPUT, --output OUTPUT 344 | target report file name in markdown format 345 | -f, --force force to regenerate all CSV files 346 | -g, --debug print out debug messages 347 | 348 | For example, `report -l base_dir -l cmp_dir -p game1 -o report.md` compares `game1` logs 349 | in two directories -- `base_dir` and `cmp_dir` -- and generates `report.md`. `base_dir` 350 | is used in calculating the relative difference. When only one log directory is given, 351 | only the summary of results without comparison is provided. It expects certain file 352 | extensions: `*.factorio_out` for factorio benchmark and `*.schbench_out` for schbench 353 | benchmark. 354 | ``` 355 | 356 | Misc tools 357 | ---------- 358 | 359 | #### `sched-config`: save and restore key scheduler parameters from debugfs 360 | 361 | ``` 362 | usage: sched-config [-h] [-g GET] [-s SET] 363 | 364 | Set or get the scheduler config parameters 365 | 366 | options: 367 | -h, --help show this help message and exit 368 | -g GET, --get GET Get the scheculer parameters 369 | -s SET, --set SET Set the scheculer parameters 370 | sched-config: error: either '-g' or '-s' should be specified 371 | ``` 372 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | JoulesWatch 294 | Copyright (C) 2023 kernel-research 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | , 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /micro-bench/gbench/gbench.c: -------------------------------------------------------------------------------- 1 | /* 2 | * gbench.c 3 | * 4 | * Copyright (C) 2023 Igalia 5 | * Changwoo Min 6 | * 7 | * GPLv2, portions copied from schbench (and potentially from kernel and fio) 8 | */ 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | enum { 28 | IPC_FUTEX, 29 | IPC_PIPE_EPOLL, 30 | IPC_SOCK_SELECT, 31 | 32 | IPC_MAX, 33 | }; 34 | 35 | enum { 36 | USEC_PER_SEC = 1000000L, 37 | }; 38 | 39 | static const char *ipc_str[3] = { 40 | "futex", "pipe", "sock", 41 | }; 42 | 43 | struct opt; 44 | 45 | union pipe_fds { 46 | int fds[2]; 47 | struct { 48 | int rfd; 49 | int wfd; 50 | }; 51 | }; 52 | 53 | struct pipe_pair { 54 | union pipe_fds rx; 55 | union pipe_fds tx; 56 | }; 57 | 58 | #define MAX_EPOLL_EVENTS 64 59 | 60 | struct epoll_ipc { 61 | int fd; 62 | int nfds; 63 | struct epoll_event events[MAX_EPOLL_EVENTS]; 64 | }; 65 | 66 | union ipc { 67 | int futex; 68 | struct pipe_pair pipe; 69 | struct epoll_ipc epoll; 70 | }; 71 | 72 | #define MAIN_ID (-1) 73 | 74 | struct task_stat { 75 | __u64 cnt; 76 | __u64 avg_run_time; 77 | __u64 frq_run_time; 78 | __u64 avg_wait_time; 79 | __u64 frq_wait_time; 80 | }; 81 | 82 | struct task_data { 83 | struct opt * opt; 84 | pthread_t tid; 85 | pid_t pid; 86 | int id; 87 | union ipc ipc; 88 | __u64 run_time; 89 | __u64 wait_time; 90 | __u64 * data; 91 | struct task_stat stat; 92 | }; 93 | 94 | struct opt { 95 | int ipc_type; 96 | struct task_data main; 97 | int nr_workers; 98 | struct task_data * workers; 99 | __u64 cache_footprint_kb; /* def: 256kb */ 100 | int benchmark_time_sec; /* def: 60sec */ 101 | }; 102 | 103 | #ifdef DEBUG 104 | #define debug(fmt, ...) printf(fmt, __VA_ARGS__) 105 | #else 106 | #define debug(fmt, ...) 107 | #endif 108 | 109 | /* the message threads flip this to true when they decide runtime is up */ 110 | static volatile unsigned long stopping = 0; 111 | 112 | static struct timeval base_time; 113 | static __thread __u64 stick; 114 | 115 | /* we're so fancy we make our own futex wrappers */ 116 | #define FUTEX_BLOCKED 0 117 | #define FUTEX_RUNNING 1 118 | 119 | static int futex(int *uaddr, int futex_op, int val, 120 | const struct timespec *timeout, int *uaddr2, int val3) 121 | { 122 | return syscall(SYS_futex, uaddr, futex_op, val, timeout, uaddr2, val3); 123 | } 124 | 125 | /* 126 | * wakeup a process waiting on a futex, making sure they are really waiting 127 | * first 128 | */ 129 | static void fpost(int *futexp) 130 | { 131 | int s; 132 | 133 | if (__sync_bool_compare_and_swap(futexp, FUTEX_BLOCKED, 134 | FUTEX_RUNNING)) { 135 | s = futex(futexp, FUTEX_WAKE_PRIVATE, 1, NULL, NULL, 0); 136 | if (s == -1) { 137 | perror("FUTEX_WAKE"); 138 | exit(1); 139 | } 140 | } 141 | } 142 | 143 | /* 144 | * wait on a futex, with an optional timeout. Make sure to set 145 | * the futex to FUTEX_BLOCKED beforehand. 146 | * 147 | * This will return zero if all went well, or return -ETIMEDOUT if you 148 | * hit the timeout without getting posted 149 | */ 150 | static int fwait(int *futexp, struct timespec *timeout) 151 | { 152 | int s; 153 | while (1) { 154 | /* Is the futex available? */ 155 | if (__sync_bool_compare_and_swap(futexp, FUTEX_RUNNING, 156 | FUTEX_BLOCKED)) { 157 | break; /* Yes */ 158 | } 159 | /* Futex is not available; wait */ 160 | s = futex(futexp, FUTEX_WAIT_PRIVATE, FUTEX_BLOCKED, timeout, NULL, 0); 161 | if (s == -1 && errno != EAGAIN) { 162 | if (errno == ETIMEDOUT) 163 | return -ETIMEDOUT; 164 | perror("futex-FUTEX_WAIT"); 165 | exit(1); 166 | } 167 | } 168 | return 0; 169 | } 170 | 171 | static __u64 get_time_usec(void) 172 | { 173 | struct timeval now; 174 | signed long sec, usec; 175 | 176 | gettimeofday(&now, NULL); 177 | 178 | sec = now.tv_sec - base_time.tv_sec; 179 | usec = now.tv_usec - base_time.tv_usec; 180 | 181 | return sec*1000000 + usec; 182 | } 183 | 184 | static __u64 start_tick(void) 185 | { 186 | __u64 cur_tick = get_time_usec(); 187 | __u64 diff_tick = cur_tick - stick; 188 | stick = cur_tick; 189 | return diff_tick; 190 | } 191 | 192 | static __u64 get_cur_tick(void) 193 | { 194 | return get_time_usec() - stick; 195 | } 196 | 197 | static __u64 get_matrix_size(struct opt *opt) 198 | { 199 | return sqrt(opt->cache_footprint_kb * 1024 / 3 / sizeof(__u64)); 200 | } 201 | 202 | /* 203 | * multiply two matrices in a naive way to emulate some cache footprint 204 | */ 205 | static void do_some_math(struct task_data *t) 206 | { 207 | __u64 matrix_size = get_matrix_size(t->opt); 208 | __u64 i, j, k; 209 | __u64 *m1, *m2, *m3; 210 | 211 | m1 = &t->data[0]; 212 | m2 = &t->data[matrix_size * matrix_size]; 213 | m3 = &t->data[2 * matrix_size * matrix_size]; 214 | 215 | for (i = 0; i < matrix_size; i++) { 216 | for (j = 0; j < matrix_size; j++) { 217 | m3[i * matrix_size + j] = 0; 218 | 219 | for (k = 0; k < matrix_size; k++) 220 | m3[i * matrix_size + j] += 221 | m1[i * matrix_size + k] * 222 | m2[k * matrix_size + j]; 223 | } 224 | } 225 | } 226 | 227 | static __u64 calc_avg(__u64 old_val, __u64 new_val) 228 | { 229 | /* EWMA = (0.75 * old) + (0.25 * new) */ 230 | return (old_val - (old_val >> 2)) + (new_val >> 2); 231 | } 232 | 233 | static __u64 calc_avg_freq(__u64 old_freq, __u64 interval) 234 | { 235 | __u64 new_freq, ewma_freq; 236 | 237 | new_freq = USEC_PER_SEC / interval; 238 | ewma_freq = calc_avg(old_freq, new_freq); 239 | 240 | return ewma_freq; 241 | } 242 | 243 | static void update_stat(struct task_data *t, __u64 wait_int, __u64 run_dur) 244 | { 245 | struct task_stat *s = &t->stat; 246 | 247 | s->avg_run_time = calc_avg(s->avg_run_time, run_dur); 248 | s->frq_run_time = calc_avg_freq(s->frq_run_time, run_dur+wait_int); 249 | 250 | s->avg_wait_time = calc_avg(s->avg_wait_time, wait_int); 251 | s->frq_wait_time = calc_avg_freq(s->frq_wait_time, run_dur+wait_int); 252 | } 253 | 254 | static void do_work(struct task_data *t) 255 | { 256 | __u64 wait_interval, run_duration; 257 | 258 | /* do some computation */ 259 | wait_interval = start_tick(); 260 | do { 261 | do_some_math(t); 262 | } while (t->run_time >= (run_duration = get_cur_tick())); 263 | 264 | /* update statistics */ 265 | update_stat(t, wait_interval, run_duration); 266 | } 267 | 268 | static void worker_create_ipc(struct task_data *w) 269 | { 270 | int ret = 0; 271 | 272 | switch(w->opt->ipc_type) { 273 | case IPC_FUTEX: 274 | /* do nothing */ 275 | break; 276 | case IPC_PIPE_EPOLL: 277 | /* create a pair of pipe -- rx and tx */ 278 | ret = pipe(w->ipc.pipe.rx.fds); 279 | if (ret == -1) { 280 | perror("failed to create an rx pipe\n"); 281 | exit(1); 282 | } 283 | ret = pipe(w->ipc.pipe.tx.fds); 284 | if (ret == -1) { 285 | perror("failed to create a tx pipe\n"); 286 | exit(1); 287 | } 288 | break; 289 | case IPC_SOCK_SELECT: 290 | break; 291 | default: 292 | fprintf(stderr, "incorrect ipc type: %d\n", w->opt->ipc_type); 293 | exit(1); 294 | break; 295 | } 296 | } 297 | 298 | static void worker_ping_pong_futex(struct task_data *w, struct task_data *m) 299 | { 300 | /* set myself to blocked */ 301 | w->ipc.futex = FUTEX_BLOCKED; 302 | 303 | /* let the main know */ 304 | fpost(&m->ipc.futex); 305 | 306 | /* 307 | * don't wait if the main threads are shutting down, 308 | * they will never kick us fpost has a full barrier, so as long 309 | * as the message thread walks his list after setting stopping, 310 | * we shouldn't miss the wakeup 311 | */ 312 | if (!stopping) { 313 | /* if he hasn't already woken us up, wait */ 314 | fwait(&w->ipc.futex, NULL); 315 | } 316 | } 317 | 318 | static void worker_ping_pong_pipe(struct task_data *w, struct task_data *m) 319 | { 320 | int wr_id = w->id; 321 | 322 | /* let the main know */ 323 | ssize_t w_ret = write(w->ipc.pipe.tx.wfd, &wr_id, sizeof(wr_id)); 324 | if (w_ret != sizeof(wr_id)) { 325 | perror("worker write failed"); 326 | exit(1); 327 | } 328 | 329 | /* 330 | * don't wait if the main threads are shutting down, 331 | * they will never kick us fpost has a full barrier, so as long 332 | * as the message thread walks his list after setting stopping, 333 | * we shouldn't miss the wakeup 334 | */ 335 | if (!stopping) { 336 | /* if he hasn't already woken us up, wait */ 337 | ssize_t r = read(w->ipc.pipe.rx.rfd, &wr_id, sizeof(wr_id)); 338 | if (r != sizeof(wr_id)) { 339 | perror("worker read failed"); 340 | exit(1); 341 | } 342 | } 343 | } 344 | 345 | static int worker_ping_pong(struct task_data *w, struct task_data *m) 346 | { 347 | /* full memory barrier */ 348 | __sync_synchronize(); 349 | 350 | if (stopping) 351 | return 1; 352 | 353 | switch(w->opt->ipc_type) { 354 | case IPC_FUTEX: 355 | worker_ping_pong_futex(w, m); 356 | break; 357 | case IPC_PIPE_EPOLL: 358 | worker_ping_pong_pipe(w, m); 359 | break; 360 | case IPC_SOCK_SELECT: 361 | break; 362 | default: 363 | fprintf(stderr, "incorrect ipc type: %d\n", w->opt->ipc_type); 364 | exit(1); 365 | break; 366 | } 367 | 368 | return 0; 369 | } 370 | 371 | static void *worker_thr(void *arg) 372 | { 373 | struct task_data *w = arg; 374 | struct task_data *m = &w->opt->main; 375 | 376 | w->pid = gettid(); 377 | 378 | for (w->stat.cnt = 0; 1; w->stat.cnt++) { 379 | debug("work[%lx] = %llu\n", w->tid, w->stat.cnt); 380 | 381 | /* exchange a heartbeat signal */ 382 | if (worker_ping_pong(w, m)) 383 | break; 384 | 385 | /* do some computation */ 386 | do_work(w); 387 | 388 | /* sleep for a while */ 389 | usleep(w->wait_time); 390 | } 391 | 392 | return NULL; 393 | } 394 | 395 | static 396 | int main_ping_pong_pipe(struct task_data *m, struct task_data *w, int nr_w) 397 | { 398 | struct epoll_ipc *epoll = &m->ipc.epoll; 399 | int wr_id; 400 | 401 | /* unblock workers */ 402 | for (int i = 0; i < epoll->nfds; i++) { 403 | /* read a ping message from a worker */ 404 | ssize_t r = read(epoll->events[i].data.fd, &wr_id, sizeof(wr_id)); 405 | if (r != sizeof(wr_id)) { 406 | perror("read failed"); 407 | exit(1); 408 | } 409 | 410 | /* send a pong message back to the worker */ 411 | ssize_t w_ret = write(w[wr_id].ipc.pipe.rx.wfd, &wr_id, sizeof(wr_id)); 412 | if (w_ret != sizeof(wr_id)) { 413 | perror("write failed"); 414 | exit(1); 415 | } 416 | } 417 | 418 | if (stopping) { 419 | for (int i = 0; i < nr_w; i++) { 420 | wr_id = i; 421 | ssize_t w_ret = write(w[i].ipc.pipe.rx.wfd, &wr_id, sizeof(wr_id)); 422 | if (w_ret != sizeof(wr_id)) { 423 | perror("write failed (stopping)"); 424 | exit(1); 425 | } 426 | } 427 | return 1; 428 | } 429 | 430 | /* wait for response from a worker */ 431 | while (!stopping) { 432 | m->ipc.epoll.nfds = epoll_wait(m->ipc.epoll.fd, 433 | m->ipc.epoll.events, 434 | MAX_EPOLL_EVENTS, 435 | 100); 436 | switch (m->ipc.epoll.nfds) { 437 | case 0: /* time out then retry */ 438 | __sync_synchronize(); 439 | break; 440 | case -1: /* error */ 441 | perror("failed to epoll_wait\n"); 442 | exit(1); 443 | return 1; 444 | default: /* got some messages */ 445 | return 0; 446 | } 447 | } 448 | 449 | return 0; 450 | } 451 | 452 | static 453 | int main_ping_pong_futex(struct task_data *m, struct task_data *w, int nr_w) 454 | { 455 | m->ipc.futex = FUTEX_BLOCKED; 456 | 457 | /* unblock workers */ 458 | for (int i = 0; i < nr_w; i++) 459 | fpost(&w[i].ipc.futex); 460 | 461 | if (stopping) { 462 | for (int i = 0; i < nr_w; i++) 463 | fpost(&w[i].ipc.futex); 464 | return 1; 465 | } 466 | 467 | /* wait for response from a worker */ 468 | fwait(&m->ipc.futex, NULL); 469 | 470 | return 0; 471 | } 472 | 473 | static int main_ping_pong(struct task_data *m, struct task_data *w, int nr_w) 474 | { 475 | /* full memory barrier */ 476 | __sync_synchronize(); 477 | 478 | 479 | switch(m->opt->ipc_type) { 480 | case IPC_FUTEX: 481 | return main_ping_pong_futex(m, w, nr_w); 482 | case IPC_PIPE_EPOLL: 483 | return main_ping_pong_pipe(m, w, nr_w); 484 | case IPC_SOCK_SELECT: 485 | break; 486 | default: 487 | fprintf(stderr, "incorrect ipc type: %d\n", m->opt->ipc_type); 488 | exit(1); 489 | break; 490 | } 491 | 492 | return 0; 493 | } 494 | 495 | static void main_create_ipc(struct task_data *m) 496 | { 497 | int ret = 0; 498 | 499 | switch(m->opt->ipc_type) { 500 | case IPC_FUTEX: 501 | /* do nothing */ 502 | break; 503 | case IPC_PIPE_EPOLL: 504 | /* create an epoll instance */ 505 | m->ipc.epoll.fd = epoll_create1(0); 506 | if (m->ipc.epoll.fd == -1) { 507 | perror("failed to create an epoll\n"); 508 | exit(1); 509 | } 510 | 511 | /* add workers's tx.rfd to epollfd */ 512 | for (int i = 0; i < m->opt->nr_workers; ++i) { 513 | struct task_data *w = &m->opt->workers[i]; 514 | struct epoll_event ev; 515 | 516 | ev.events = EPOLLIN; 517 | ev.data.fd = w->ipc.pipe.tx.rfd; 518 | ret = epoll_ctl(m->ipc.epoll.fd, 519 | EPOLL_CTL_ADD, 520 | w->ipc.pipe.tx.rfd, 521 | &ev); 522 | if (ret == -1) { 523 | perror("failed to add epoll_ctl\n"); 524 | exit(1); 525 | } 526 | } 527 | break; 528 | case IPC_SOCK_SELECT: 529 | break; 530 | default: 531 | fprintf(stderr, "incorrect ipc type: %d\n", m->opt->ipc_type); 532 | exit(1); 533 | break; 534 | } 535 | } 536 | 537 | static void *main_thr(void *arg) 538 | { 539 | struct opt *opt = arg; 540 | struct task_data *m = &opt->main; 541 | struct task_data *w = opt->workers; 542 | int nr_w = opt->nr_workers; 543 | int i; 544 | 545 | /* init main id */ 546 | m->pid = gettid(); 547 | m->id = MAIN_ID; 548 | 549 | /* launch workers */ 550 | for (i = 0; i < opt->nr_workers; i++) { 551 | int ret; 552 | 553 | /* Create a worker thread. */ 554 | w[i].id = i; 555 | worker_create_ipc(&w[i]); 556 | ret = pthread_create(&w[i].tid, NULL, worker_thr, &w[i]); 557 | if (ret) { 558 | fprintf(stderr, "error %d from pthread_create\n", ret); 559 | exit(1); 560 | } 561 | } 562 | 563 | /* init ipc for the main thread */ 564 | main_create_ipc(m); 565 | 566 | /* do its work */ 567 | for (m->stat.cnt = 0; 1; m->stat.cnt++) { 568 | debug("main[%lx] = %llu\n", m->tid, m->stat.cnt); 569 | 570 | /* exchange a heartbeat signal */ 571 | if (main_ping_pong(m, w, nr_w)) 572 | break; 573 | 574 | /* do some computation */ 575 | do_work(m); 576 | 577 | /* sleep for a while */ 578 | usleep(m->wait_time); 579 | } 580 | 581 | /* now, it's time to finish. wait for workers. */ 582 | for (i = 0; i < opt->nr_workers; i++) 583 | pthread_join(w[i].tid, NULL); 584 | 585 | return NULL; 586 | } 587 | 588 | static void launch_main_thr(struct opt *opt) 589 | { 590 | struct task_data *m = &opt->main; 591 | int ret; 592 | 593 | /* Create a main thread. Workers will be created by the main thread. */ 594 | ret = pthread_create(&m->tid, NULL, main_thr, opt); 595 | if (ret) { 596 | fprintf(stderr, "error %d from pthread_create\n", ret); 597 | exit(1); 598 | } 599 | } 600 | 601 | static void print_usage(void) 602 | { 603 | fprintf(stderr, "gbench usage:\n" 604 | "\t-i (--ipc): ipc type: futex, pipe, socket (def: futex)\n" 605 | "\t-s (--star): workers communicates only through a main threads\n" 606 | "\t specify 'r1:w1,r2:w2' where r1 and r2 are run time;\n" 607 | "\t w1 and w2 are wait time for each worker thread.\n" 608 | "\t time units are all usec.\n" 609 | "\t-t (--time): benchmark time in seconds (def: 60)\n" 610 | "\t-F (--cache_footprint): cache footprint (kb, def: 256)\n" 611 | ); 612 | exit(1); 613 | } 614 | 615 | static int get_nr_toks(char *s) 616 | { 617 | int nr = 0; 618 | 619 | for (; *s != '\0'; s++) { 620 | if (*s == ',') 621 | nr++; 622 | } 623 | return nr + 1; 624 | } 625 | 626 | static __u64 *alloc_data(struct task_data *t) 627 | { 628 | int matrix_size = get_matrix_size(t->opt); 629 | 630 | return malloc(3 * sizeof(__u64) * matrix_size * matrix_size); 631 | } 632 | 633 | static int parse_subopt_s(struct opt *opt, char *s) 634 | { 635 | struct task_data *m = &opt->main; 636 | struct task_data *w; 637 | int num = get_nr_toks(s); 638 | char *t; 639 | int i; 640 | 641 | /* parse run time and wait time for main */ 642 | t = strtok(s, ",:"); 643 | m->run_time = atol(t); 644 | t = strtok(NULL, ",:"); 645 | m->wait_time = atol(t); 646 | m->opt = opt; 647 | 648 | /* alloc workers array */ 649 | opt->nr_workers = num - 1; 650 | opt->workers = w = calloc(num - 1, sizeof(struct task_data)); 651 | if (!opt->workers) 652 | return -ENOMEM; 653 | for (i = 0; i < opt->nr_workers; i++) { 654 | w[i].opt = opt; 655 | w[i].data = alloc_data(&w[i]); 656 | if (!w[i].data) 657 | return -ENOMEM; 658 | } 659 | 660 | /* parse run time and wait time for each worker */ 661 | for (i = 0; i < opt->nr_workers && t; i++) { 662 | t = strtok(NULL, ",:"); 663 | w[i].run_time = atol(t); 664 | t = strtok(NULL, ",:"); 665 | w[i].wait_time = atol(t); 666 | } 667 | 668 | return 0; 669 | } 670 | 671 | static int parse_ipc_type(char *s) 672 | { 673 | for (int i = 0; i < IPC_MAX; i++) { 674 | if (strcmp(ipc_str[i], s) == 0) 675 | return i; 676 | } 677 | 678 | return -EINVAL; 679 | } 680 | 681 | static int parse_options(struct opt *opt, int argc, char **argv) 682 | { 683 | char *option_string = "i:s:t:F:h"; 684 | static struct option long_options[] = { 685 | {"ipc", required_argument, 0, 'i'}, 686 | {"time", required_argument, 0, 't'}, 687 | {"star", required_argument, 0, 's'}, 688 | {"cache_footprint", required_argument, 0, 'F'}, 689 | {"help", no_argument, 0, 'h'}, 690 | {0, 0, 0, 0} 691 | }; 692 | int c, ret;; 693 | 694 | /* init opt to default values */ 695 | memset(opt, 0, sizeof(*opt)); 696 | opt->ipc_type = IPC_FUTEX; 697 | opt->cache_footprint_kb = 256; 698 | opt->benchmark_time_sec = 60; 699 | 700 | /* parse options */ 701 | while (1) { 702 | int option_index = 0; 703 | 704 | c = getopt_long(argc, argv, option_string, 705 | long_options, &option_index); 706 | if (c == -1) 707 | break; 708 | 709 | switch(c) { 710 | case 'i': 711 | ret = parse_ipc_type(optarg); 712 | if (ret < 0 ) 713 | return ret; 714 | opt->ipc_type = ret; 715 | break; 716 | case 't': 717 | opt->benchmark_time_sec = atoi(optarg); 718 | break; 719 | case 's': 720 | ret = parse_subopt_s(opt, optarg); 721 | if (ret) 722 | return ret; 723 | break; 724 | case 'F': 725 | opt->cache_footprint_kb = atoi(optarg); 726 | break; 727 | default: 728 | print_usage(); 729 | break; 730 | } 731 | } 732 | 733 | /* further initialize the main */ 734 | opt->main.data = alloc_data(&opt->main); 735 | if (!opt->main.data) 736 | return -ENOMEM; 737 | 738 | /* sanity check */ 739 | if (opt->nr_workers < 1) 740 | print_usage(); 741 | 742 | return 0; 743 | } 744 | 745 | static void stop_benchmark(struct opt *opt) 746 | { 747 | struct task_data *m = &opt->main; 748 | 749 | /* full memory barrier */ 750 | __sync_synchronize(); 751 | 752 | /* then update it atomically */ 753 | __sync_bool_compare_and_swap(&stopping, 0, 1); 754 | 755 | /* finally waiting for the termination of the main */ 756 | fpost(&m->ipc.futex); 757 | pthread_join(m->tid, NULL); 758 | } 759 | 760 | static void show_results(struct opt *opt) 761 | { 762 | struct task_data *m = &opt->main; 763 | struct task_data *w = opt->workers; 764 | int nr_w = opt->nr_workers; 765 | 766 | printf("# thread\t %10s %10s %10s %10s %10s %10s %10s\n", 767 | "run_t", "run_a", "run_f", "wait_t", "wait_a", "wait_f", "cnt"); 768 | printf("main-thr[%d]\t %10lld %10lld %10lld " 769 | "%10lld %10lld %10lld %10lld\n", 770 | m->pid, 771 | m->run_time, m->stat.avg_run_time, m->stat.frq_run_time, 772 | m->wait_time, m->stat.avg_wait_time, m->stat.frq_wait_time, 773 | m->stat.cnt); 774 | 775 | for (int i = 0; i < nr_w; i++) { 776 | printf("worker[%d]-%d\t %10lld %10lld %10lld " 777 | "%10lld %10lld %10lld %10lld\n", 778 | w[i].pid, i, w[i].run_time, w[i].stat.avg_run_time, 779 | w[i].stat.frq_run_time, w[i].wait_time, 780 | w[i].stat.avg_wait_time, w[i].stat.frq_wait_time, 781 | w[i].stat.cnt); 782 | } 783 | } 784 | 785 | static void init(void) 786 | { 787 | /* init base time for overflow-free time calculation */ 788 | gettimeofday(&base_time, NULL); 789 | } 790 | 791 | int main(int argc, char **argv) 792 | { 793 | struct opt opt; 794 | 795 | init(); 796 | parse_options(&opt, argc, argv); 797 | launch_main_thr(&opt); 798 | sleep(opt.benchmark_time_sec); 799 | stop_benchmark(&opt); 800 | show_results(&opt); 801 | 802 | return 0; 803 | } 804 | -------------------------------------------------------------------------------- /bin/report: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import sys 4 | import subprocess 5 | import glob 6 | import csv 7 | import datetime 8 | import argparse 9 | 10 | dbg_prt = False 11 | cur_dir = os.path.dirname(__file__) 12 | out_dir = None 13 | 14 | def print_log(msg): 15 | BLUE = '\033[94m' 16 | ENDC = '\033[0m' 17 | print(BLUE + "# [report] " + msg + ENDC) 18 | 19 | class stat_item: 20 | def __init__(self): 21 | # Information is stored in a list of tuples, each of which is a pair of 22 | # a property name (median FPS) and its value (60). 23 | self.stat = None 24 | 25 | # `*_diff` is a list of peformance difference against a baseline in %. 26 | # It is a list of float, and the order is the same of the original one. 27 | self.diff = None 28 | 29 | def calc_diff(self, baseline): 30 | # sanity check 31 | if self.stat == None or baseline.stat == None: 32 | return 33 | 34 | # self difference is always 0% difference. 35 | if self == baseline: 36 | self.diff = [0.0] * len(self.stat) 37 | return 38 | # calc delta over the basseline 39 | self.diff = [] 40 | for (b, c) in zip(baseline.stat, self.stat): 41 | b_v = float(b[1]) 42 | c_v = float(c[1]) 43 | if b_v == c_v: 44 | d = 0.0 45 | else: 46 | if b_v == 0.0: 47 | b_v = b_v + sys.float_info.min 48 | d = ((c_v - b_v) / b_v) * 100.0 49 | self.diff.append(d) 50 | global dbg_prt 51 | if dbg_prt: 52 | print(self.diff) 53 | 54 | class stat_app: 55 | 56 | def __init__(self, ldir, prefix, force): 57 | self.ldir = os.path.abspath(ldir) 58 | self.prefix = prefix 59 | self.nick = self.ldir.split("-")[-1] 60 | self.force = force 61 | 62 | # scinsight 63 | # --------- 64 | # * system call statistics 65 | self.sc_top10 = [] 66 | self.sc_thr_nr = 0 67 | 68 | # ginsight 69 | # -------- 70 | # * FPS 71 | # - 97p, median, 1p, 0.1p 72 | self.fps = stat_item() 73 | # * CPU util 74 | # - mediam, max 75 | self.cpu_util = stat_item() 76 | # * GPU util 77 | # - mediam, max 78 | self.gpu_util = stat_item() 79 | # * RAM util 80 | # - median, max 81 | self.ram_util = stat_item() 82 | 83 | # factorio 84 | # -------- 85 | # * processing time 86 | # - avg, min, max 87 | self.factorio = stat_item() 88 | 89 | # schbench 90 | # -------- 91 | # * wakeup_lat (usec) 92 | # - min, 50.0th, 90.0th, 99.0th, 99.9th, max 93 | self.wakeup_lat = stat_item() 94 | # * req_lat (usec) 95 | # - min, 50.0th, 90.0th, 99.0th, 99.9th, max 96 | self.req_lat = stat_item() 97 | # * throughput (request per second) 98 | # - min, 20.0th, average, 50.0th, 90.0th, max 99 | self.rps= stat_item() 100 | 101 | # procinsight 102 | # ----------- 103 | # * CPU power state 104 | # - C0, poll, C1, c2, c3 105 | self.cpu_pwr = stat_item() 106 | # * Clock 107 | # - freq 108 | self.clock_freq = stat_item() 109 | # * Energy 110 | # - J, J/sec 111 | self.energy = stat_item() 112 | # * processor 113 | # - ipc, front-end stall (%), back-end stall (%), page faults 114 | self.processor = stat_item() 115 | # * scheduling 116 | # - context_switches, cpu-migrations, sched_wakeup 117 | self.sched = stat_item() 118 | 119 | def exec_insight(self, c, a): 120 | global cur_dir 121 | cmd = os.path.abspath(os.path.join(cur_dir, c)) + " " + a 122 | print_log("Running %s" % cmd) 123 | p = subprocess.Popen(cmd, shell=True, stdout=None, stderr=None) 124 | p.wait() 125 | return p 126 | 127 | def all_csv_exist(self, insight, props): 128 | for p in props: 129 | l = os.path.join(self.ldir, self.prefix + "-" + insight + "-" + p + ".csv") 130 | if os.path.isfile(l) == False: 131 | return False 132 | return True 133 | 134 | def build_sc_stat(self): 135 | # count the number of threads 136 | scm_logs = glob.glob(os.path.join(self.ldir, self.prefix + "-scmon.*")) 137 | self.sc_thr_nr = len(scm_logs) 138 | if self.sc_thr_nr == 0: 139 | return 140 | 141 | # if sc_log does not exists, generate it. 142 | props = ["stat"] 143 | if self.force == True or self.all_csv_exist("scinsight", props) == False: 144 | arg = "-q -o {outdir} -l {log}".format( 145 | outdir = self.ldir, log = self.prefix) 146 | self.exec_insight("scinsight", arg) 147 | if self.all_csv_exist("scinsight", props) == False: 148 | return 149 | 150 | # load the stat CSV 151 | sc_log = os.path.join(self.ldir, self.prefix + "-scinsight-stat.csv") 152 | with open(sc_log, "r") as f: 153 | rd = csv.reader(f) 154 | rd.__next__() 155 | for i in range(0,10): 156 | s, _, r = rd.__next__() 157 | self.sc_top10.append( (s.strip(), r.strip()) ) 158 | 159 | def build_data_list(self, insight, prop, keys): 160 | l = os.path.join(self.ldir, self.prefix + 161 | "-" + insight + "-" + prop + ".csv") 162 | with open(l, "r") as f: 163 | rd = csv.reader(f) 164 | rd.__next__() 165 | data= {} 166 | for s, fps in rd: 167 | data[s.strip()] = fps.strip() 168 | dlist = [] 169 | for k in keys: 170 | dlist.append( (k, data[k]) ) 171 | global dbg_prt 172 | if dbg_prt: 173 | print(dlist) 174 | return dlist 175 | 176 | def build_g_stat(self): 177 | # if there is a missing CSV, generate all CSVs 178 | props = ["cpu_load", "gpu_load", "fps", "ram_used"] 179 | if self.force == True or self.all_csv_exist("ginsight", props) == False: 180 | mlog = os.path.join(self.ldir, self.prefix + "-mangohud.csv") 181 | arg = "-q -l {mlog} -o {outdir} -p {prefix}".format( 182 | mlog = mlog, outdir = self.ldir, prefix = self.prefix) 183 | self.exec_insight("ginsight", arg) 184 | if self.all_csv_exist("ginsight", props) == False: 185 | return 186 | 187 | # * FPS 188 | # - 97p, median, 1p, 0.1p 189 | keys = ["Low 97%", "Median", "Low 1%", "Low 0.1%"] 190 | self.fps.stat = self.build_data_list("ginsight", "fps", keys) 191 | 192 | # * CPU util, GPU util, RAM util 193 | # - mediam, max 194 | keys = ["Median", "Max"] 195 | self.cpu_util.stat = self.build_data_list("ginsight", "cpu_load", keys) 196 | self.gpu_util.stat = self.build_data_list("ginsight", "gpu_load", keys) 197 | self.ram_util.stat = self.build_data_list("ginsight", "ram_used", keys) 198 | 199 | def build_factorio_stat(self): 200 | flog = os.path.join(self.ldir, self.prefix + ".factorio_out") 201 | if os.path.isfile(flog) == False: 202 | return 203 | 204 | with open(flog, "r") as f: 205 | print_log("Loading %s" % flog) 206 | for line in f: 207 | line = line.strip() 208 | if line.startswith("avg:") == False: 209 | continue 210 | # avg: 76.110 ms, min: 70.599 ms, max: 233.318 ms 211 | dlist = [] 212 | for tok in line.split(","): 213 | k, v = tok.split(":") 214 | v = v[:-3].strip() 215 | dlist.append( (k, v) ) 216 | self.factorio.stat = dlist 217 | break 218 | global dbg_prt 219 | if dbg_prt: 220 | print(self.factoriot.stat) 221 | 222 | def build_schbench_stat(self): 223 | flog = os.path.join(self.ldir, self.prefix + ".schbench_out") 224 | if os.path.isfile(flog) == False: 225 | return 226 | 227 | class STAT: 228 | # state 229 | UNKNOWN = 0 230 | WAKEUP_LAT = 1 231 | REQ_LAT = 2 232 | RPS = 3 233 | 234 | # state transition map 235 | trans = [("Wakeup Latencies percentiles", WAKEUP_LAT), 236 | ("Request Latencies percentiles", REQ_LAT), 237 | ("RPS percentiles", RPS)] 238 | 239 | def __init__(self, sa): 240 | self.stat_app = sa 241 | self.s = STAT.UNKNOWN 242 | self.new_s = STAT.UNKNOWN 243 | self.dlist = [] 244 | 245 | def is_new_state(self, line): 246 | for prefix, new_s in self.trans: 247 | if line.startswith(prefix): 248 | self.new_s = new_s 249 | return True 250 | return False 251 | 252 | def trans_state(self): 253 | # wrarp up the current state 254 | if self.s == self.WAKEUP_LAT: 255 | self.stat_app.wakeup_lat.stat = self.dlist 256 | elif self.s == self.REQ_LAT: 257 | self.stat_app.req_lat.stat = self.dlist 258 | elif self.s == self.RPS: 259 | self.stat_app.rps.stat = self.dlist 260 | # prep for the new state 261 | self.dlist = [] 262 | self.s = self.new_s 263 | self.new_s = self.UNKNOWN 264 | 265 | s = STAT(self) 266 | with open(flog, "r") as f: 267 | for line in f: 268 | # transitioning to a new state 269 | if s.is_new_state(line): 270 | s.trans_state() 271 | continue 272 | # continue on the old state 273 | # * clean up lines 274 | line = line.strip() 275 | if line.startswith("current rps:") or \ 276 | line.startswith("final rps goal was") or \ 277 | line.startswith("setting worker threads") or \ 278 | line.startswith("#") or line == "": \ 279 | continue 280 | if line.startswith("* "): 281 | line = line[2:].strip() 282 | if line.find("(") != -1: 283 | line = line[: line.find("(")] 284 | line = line.strip() 285 | # * parsing 286 | # - min=7403, max=7900 287 | if line.startswith("min"): 288 | mx_list = line.split(",") 289 | for mx in mx_list: 290 | kv = mx.split("=") 291 | s.dlist.append( (kv[0].strip(), kv[1].strip()) ) 292 | # - 20.0th: 7544 293 | # - average rps: 7582.38 294 | else: 295 | kv = line.split(":") 296 | s.dlist.append( (kv[0].strip(), kv[1].strip()) ) 297 | 298 | def build_proc_stat(self): 299 | # if there is a missing CSV, generate all CSVs 300 | props = ["cstate-sw", "energy-sw", "perf-sw", "sched-sw"] 301 | if self.force == True or self.all_csv_exist("procinsight", props) == False: 302 | arg = "-q -o {outdir} -l {prefix}".format( 303 | outdir = self.ldir, prefix = self.prefix) 304 | self.exec_insight("procinsight", arg) 305 | if self.all_csv_exist("procinsight", props) == False: 306 | return 307 | 308 | # * CPU power state 309 | # - C0, poll, C1, c2, c3 310 | keys = ["C0", "POLL", "C1", "C2", "C3"] 311 | self.cpu_pwr.stat = self.build_data_list("procinsight", "cstate-sw", keys) 312 | # * Clock 313 | # - freq 314 | keys = ["Freq"] 315 | self.clock_freq.stat = self.build_data_list("procinsight", "cstate-sw", keys) 316 | # * Energy 317 | # - J, J/sec 318 | keys= ["J", "J/sec"] 319 | self.energy.stat = self.build_data_list("procinsight", "energy-sw", keys) 320 | # * processor 321 | # - ipc, front-end stall (%), back-end stall (%), page faults 322 | keys = ["ipc", "frontend-stall (%)", "backend-stall (%)", "page-faults"] 323 | self.processor.stat = self.build_data_list("procinsight", "perf-sw", keys) 324 | # * scheduling 325 | # - context_switches, cpu-migrations, sched_wakeup 326 | keys = ["context-switches", "cpu-migrations"] 327 | self.sched.stat = self.build_data_list("procinsight", "perf-sw", keys) 328 | keys = ["sched_wakeup"] 329 | self.sched.stat = self.sched.stat + \ 330 | self.build_data_list("procinsight", "sched-sw", keys) 331 | 332 | def build_stat(self): 333 | self.build_sc_stat() 334 | self.build_factorio_stat() 335 | self.build_schbench_stat() 336 | self.build_g_stat() 337 | self.build_proc_stat() 338 | 339 | def calc_diff(self, baseline): 340 | self.factorio.calc_diff(baseline.factorio) 341 | self.wakeup_lat.calc_diff(baseline.wakeup_lat) 342 | self.req_lat.calc_diff(baseline.req_lat) 343 | self.rps.calc_diff(baseline.rps) 344 | self.fps.calc_diff(baseline.fps) 345 | self.cpu_util.calc_diff(baseline.cpu_util) 346 | self.gpu_util.calc_diff(baseline.gpu_util) 347 | self.ram_util.calc_diff(baseline.ram_util) 348 | self.cpu_pwr.calc_diff(baseline.cpu_pwr) 349 | self.clock_freq.calc_diff(baseline.clock_freq) 350 | self.energy.calc_diff(baseline.energy) 351 | self.processor.calc_diff(baseline.processor) 352 | self.sched.calc_diff(baseline.sched) 353 | 354 | def build_app_stats(args): 355 | # buid app stat for each log directory 356 | app_stats = [] 357 | for logdir in args.logdir: 358 | stat = stat_app(logdir, args.prefix, args.force) 359 | stat.build_stat() 360 | app_stats.append(stat) 361 | 362 | # calculate diff in % 363 | baseline = app_stats[0] 364 | for comp in app_stats: 365 | comp.calc_diff(baseline) 366 | 367 | return app_stats 368 | 369 | def get_res_path(ldir, fname): 370 | global out_dir 371 | afil = os.path.join(ldir, fname) 372 | rfil = os.path.relpath(afil, start = out_dir) 373 | return afil, rfil 374 | 375 | def gen_md_tbl(stat_app, suffix, tuples, header, f): 376 | nick = stat_app.nick 377 | afil, rfil = get_res_path(stat_app.ldir, stat_app.prefix + suffix) 378 | 379 | # generate header row 380 | if header: 381 | l1, l2 = "| |", "| ---- |" 382 | for t in tuples: 383 | l1 = l1 + " " + t[0] + " | " 384 | l2 = l2 + " ---: | " 385 | print(l1, file = f) 386 | print(l2, file = f) 387 | # generate data 388 | if os.path.isfile(afil): 389 | l = "| [**" + nick + "**](" + rfil + ") |" 390 | else: 391 | l = "| **" + nick + "** |" 392 | for t in tuples: 393 | l = l + " " + t[1] + " | " 394 | print(l, file = f) 395 | 396 | def get_style(d): 397 | if d == 0: 398 | return "" 399 | if d > 0.0: 400 | return "**" 401 | return "*" 402 | 403 | def gen_md_diff_tbl(stat_app, suffix, st_item, baseline, f): 404 | nick = stat_app.nick 405 | afil, rfil = get_res_path(stat_app.ldir, stat_app.prefix + suffix) 406 | 407 | # generate header row for baseline 408 | if baseline: 409 | l1, l2 = "| |", "| ---- |" 410 | for s in st_item.stat: 411 | l1 = l1 + " " + s[0] + " | " 412 | l2 = l2 + " ---: | " 413 | print(l1, file = f) 414 | print(l2, file = f) 415 | # generate data 416 | if os.path.isfile(afil): 417 | l = "| [**" + nick + "**](" + rfil + ") |" 418 | else: 419 | l = "| **" + nick + "** |" 420 | for (s, d) in zip(st_item.stat, st_item.diff): 421 | style = get_style(d) 422 | l = l + " " + style + s[1] 423 | if baseline == False: 424 | l = l + " (%.4f" % d + "%)" 425 | l = l + style + " | " 426 | print(l, file = f) 427 | 428 | def gen_report_config(args, app_stats, f): 429 | # title = prefix 430 | print("``` {=html}", file = f) 431 | print("", file = f) 434 | print("```", file = f) 435 | 436 | print("---", file = f) 437 | print("title: %s" % args.prefix, file = f) 438 | print("date: %s" % datetime.datetime.now(), file = f) 439 | print("---", file = f) 440 | print("\n\n", file = f) 441 | 442 | # #### Comparisons 443 | print("### Comparisons\n", file = f) 444 | for s in app_stats: 445 | print("- **%s**: %s" % (s.nick, s.ldir), file = f) 446 | print("\n\n", file = f) 447 | 448 | def gen_report_syscall(args, app_stats, f): 449 | for s in app_stats: 450 | if s.sc_thr_nr == 0: 451 | continue 452 | print("### System call\n", file = f) 453 | print("- Number of threads involved: %d\n" % s.sc_thr_nr, file = f) 454 | print("- Top 10 system calls (%)\n", file = f) 455 | gen_md_tbl(s, "-scinsight-stat.svg", s.sc_top10, True, f) 456 | print("\n\n", file = f) 457 | 458 | def gen_report_factorio(args, app_stats, f): 459 | s = app_stats[0] 460 | if s.factorio.stat == None: 461 | return 462 | print("### Map update time in msec\n", file = f) 463 | gen_md_diff_tbl(s, ".factorio_out", s.factorio, True, f) 464 | for s in app_stats[1:]: 465 | if s.factorio== None: 466 | continue 467 | gen_md_diff_tbl(s, ".factorio_out", s.factorio, False, f) 468 | print("\n\n", file = f) 469 | 470 | def gen_report_schbench_rps(args, app_stats, f): 471 | s = app_stats[0] 472 | if s.rps.stat == None: 473 | return 474 | print("### Request per second (RPS)\n", file = f) 475 | gen_md_diff_tbl(s, ".schbench_out", s.rps, True, f) 476 | for s in app_stats[1:]: 477 | if s.rps == None: 478 | continue 479 | gen_md_diff_tbl(s, ".schbench_out", s.rps, False, f) 480 | print("\n\n", file = f) 481 | 482 | def gen_report_schbench_req_lat(args, app_stats, f): 483 | s = app_stats[0] 484 | if s.req_lat.stat == None: 485 | return 486 | print("### Request latencies (usec)\n", file = f) 487 | gen_md_diff_tbl(s, ".schbench_out", s.req_lat, True, f) 488 | for s in app_stats[1:]: 489 | if s.req_lat == None: 490 | continue 491 | gen_md_diff_tbl(s, ".schbench_out", s.req_lat, False, f) 492 | print("\n\n", file = f) 493 | 494 | def gen_report_schbench_wakeup_lat(args, app_stats, f): 495 | s = app_stats[0] 496 | if s.wakeup_lat.stat == None: 497 | return 498 | print("### Wakeup latencies (usec)\n", file = f) 499 | gen_md_diff_tbl(s, ".schbench_out", s.wakeup_lat, True, f) 500 | for s in app_stats[1:]: 501 | if s.wakeup_lat == None: 502 | continue 503 | gen_md_diff_tbl(s, ".schbench_out", s.wakeup_lat, False, f) 504 | print("\n\n", file = f) 505 | 506 | def gen_report_schbench(args, app_stats, f): 507 | gen_report_schbench_rps(args, app_stats, f) 508 | gen_report_schbench_req_lat(args, app_stats, f) 509 | gen_report_schbench_wakeup_lat(args, app_stats, f) 510 | 511 | def gen_report_fps(args, app_stats, f): 512 | s = app_stats[0] 513 | if s.fps.stat == None: 514 | return 515 | print("### FPS\n", file = f) 516 | gen_md_diff_tbl(s, "-ginsight-fps.svg", s.fps, True, f) 517 | for s in app_stats[1:]: 518 | if s.fps == None: 519 | continue 520 | gen_md_diff_tbl(s, "-ginsight-fps.svg", s.fps, False, f) 521 | print("\n\n", file = f) 522 | 523 | 524 | def gen_report_cpu_util(args, app_stats, f): 525 | s = app_stats[0] 526 | if s.cpu_util.stat == None: 527 | return 528 | print("### CPU utilization\n", file = f) 529 | gen_md_diff_tbl(s, "-ginsight-cpu_load.svg", s.cpu_util, True, f) 530 | for s in app_stats[1:]: 531 | if s.cpu_util == None: 532 | continue 533 | gen_md_diff_tbl(s, "-ginsight-cpu_load.svg", s.cpu_util, False, f) 534 | print("\n\n", file = f) 535 | 536 | def gen_report_gpu_util(args, app_stats, f): 537 | s = app_stats[0] 538 | if s.gpu_util.stat == None: 539 | return 540 | print("### GPU utilization\n", file = f) 541 | gen_md_diff_tbl(s, "-ginsight-gpu_load.svg", s.gpu_util, True, f) 542 | for s in app_stats[1:]: 543 | if s.gpu_util == None: 544 | continue 545 | gen_md_diff_tbl(s, "-ginsight-gpu_load.svg", s.gpu_util, False, f) 546 | print("\n\n", file = f) 547 | 548 | def gen_report_ginsight_overview(args, app_stats, f): 549 | # print header 550 | # | | conf1 | conf2 | conf3 | 551 | # | ---- | ----- | ----- | ----- | 552 | print("### Performance overview\n", file = f) 553 | l1, l2 = "| |", "| :----: |" 554 | for s in app_stats: 555 | l1 = l1 + " " + s.nick + " | " 556 | l2 = l2 + " :---: | " 557 | print(l1, file = f) 558 | print(l2, file = f) 559 | 560 | # print graphs 561 | # | FPS | img1 | img2 | img3 | 562 | # | cpu | img1 | img2 | img3 | 563 | # | gpu | img1 | img2 | img3 | 564 | # | ram | img1 | img2 | img3 | 565 | class report_conf: 566 | name = "" 567 | suffix = "" 568 | def __init__(self, n, s): 569 | self.name = n 570 | self.suffix = s 571 | 572 | confs = [report_conf("FPS", "-ginsight-fps.svg"), 573 | report_conf("CPU", "-ginsight-cpu_load.svg"), 574 | report_conf("GPU", "-ginsight-gpu_load.svg"), 575 | report_conf("RAM", "-ginsight-ram_used.svg"),] 576 | for c in confs: 577 | l1 = "| " + c.name + " |" 578 | for s in app_stats: 579 | afil, rfil = get_res_path(s.ldir, s.prefix + c.suffix) 580 | img = "![](" + rfil + ")" 581 | l1 = l1 + " " + img + " | " 582 | print(l1, file = f) 583 | print("\n\n", file = f) 584 | 585 | def gen_report_ram_util(args, app_stats, f): 586 | s = app_stats[0] 587 | if s.ram_util.stat == None: 588 | return 589 | print("### RAM usage\n", file = f) 590 | gen_md_diff_tbl(s, "-ginsight-ram_used.svg", s.ram_util, True, f) 591 | for s in app_stats[1:]: 592 | if s.ram_util == None: 593 | continue 594 | gen_md_diff_tbl(s, "-ginsight-ram_used.svg", s.ram_util, False, f) 595 | print("\n\n", file = f) 596 | 597 | def gen_report_cpu_pwr(args, app_stats, f): 598 | s = app_stats[0] 599 | if s.cpu_pwr.stat == None: 600 | return 601 | print("### CPU power state\n", file = f) 602 | gen_md_diff_tbl(s, "-procinsight-cstate-core.csv", s.cpu_pwr, True, f) 603 | for s in app_stats[1:]: 604 | if s.cpu_pwr == None: 605 | continue 606 | gen_md_diff_tbl(s, "-procinsight-cstate-core.csv", s.cpu_pwr, False, f) 607 | print("\n\n", file = f) 608 | 609 | def gen_report_clock_freq(args, app_stats, f): 610 | s = app_stats[0] 611 | if s.clock_freq.stat == None: 612 | return 613 | print("### Clock frequency\n", file = f) 614 | gen_md_diff_tbl(s, "-procinsight-cstate-core.csv", s.clock_freq, True, f) 615 | for s in app_stats[1:]: 616 | if s.clock_freq == None: 617 | continue 618 | gen_md_diff_tbl(s, "-procinsight-cstate-core.csv", s.clock_freq, False, f) 619 | print("\n\n", file = f) 620 | 621 | def gen_report_energy(args, app_stats, f): 622 | s = app_stats[0] 623 | if s.energy.stat == None: 624 | return 625 | print("### Energy consumption \n", file = f) 626 | gen_md_diff_tbl(s, "-procmon-energy__.log", s.energy, True, f) 627 | for s in app_stats[1:]: 628 | if s.energy == None: 629 | continue 630 | gen_md_diff_tbl(s, "-procmon-energy__.log", s.energy, False, f) 631 | print("\n\n", file = f) 632 | 633 | def gen_report_processor(args, app_stats, f): 634 | s = app_stats[0] 635 | if s.processor.stat == None: 636 | return 637 | print("### Processor state\n", file = f) 638 | gen_md_diff_tbl(s, "-procinsight-perf-sw.csv", s.processor, True, f) 639 | for s in app_stats[1:]: 640 | if s.processor == None: 641 | continue 642 | gen_md_diff_tbl(s, "-procinsight-perf-sw.csv", s.processor, False, f) 643 | print("\n\n", file = f) 644 | 645 | def gen_report_sched(args, app_stats, f): 646 | s = app_stats[0] 647 | if s.sched.stat == None: 648 | return 649 | print("### Scheduling state\n", file = f) 650 | gen_md_diff_tbl(s, "-procinsight-sched-core.csv", s.sched, True, f) 651 | for s in app_stats[1:]: 652 | if s.sched == None: 653 | continue 654 | gen_md_diff_tbl(s, "-procinsight-sched-core.csv", s.sched, False, f) 655 | print("\n\n", file = f) 656 | 657 | def gen_report(args, app_stats): 658 | # generate a report in markdown format 659 | file_md = args.output + ".md" 660 | with open(file_md, "w") as f: 661 | gen_report_config(args, app_stats, f) 662 | gen_report_syscall(args, app_stats, f) 663 | gen_report_factorio(args, app_stats, f) 664 | gen_report_schbench(args, app_stats, f) 665 | gen_report_ginsight_overview(args, app_stats, f) 666 | gen_report_fps(args, app_stats, f) 667 | gen_report_sched(args, app_stats, f) 668 | gen_report_cpu_util(args, app_stats, f) 669 | gen_report_gpu_util(args, app_stats, f) 670 | gen_report_ram_util(args, app_stats, f) 671 | gen_report_cpu_pwr(args, app_stats, f) 672 | gen_report_clock_freq(args, app_stats, f) 673 | gen_report_energy(args, app_stats, f) 674 | gen_report_processor(args, app_stats, f) 675 | 676 | # convert the markdown report to html 677 | file_html = args.output + ".html" 678 | global cur_dir 679 | cmd = "pandoc --standalone --toc %s -o %s" % (file_md, file_html) 680 | print_log("Running %s" % cmd) 681 | p = subprocess.Popen(cmd, shell=True, stdout=None, stderr=None) 682 | p.wait() 683 | 684 | def get_cmd_options(argv): 685 | parser = argparse.ArgumentParser( 686 | prog = "report", 687 | description = "Generate a report of given log directories", 688 | epilog = """ 689 | For example, `report -l base_dir -l cmp_dir -p game1 -o report.md` compares `game1` logs in two directoreis -- `base_dir` and `cmp_dir` -- and generates `report.md`. `base_dir` is used in calculating the relative difference. When only one log directory is given, only the summary of results without comparison is provided. It expects certain file extensions: `*.factorio_out` for factorio benchmark and `*.schbench_out` for schbench benchmark. 690 | """) 691 | 692 | parser.add_argument('-l', '--logdir', action='append', required=True, 693 | help='a log directory. When mulltiple `-l` options ' \ 694 | 'are given, comparison will be reported using ' \ 695 | 'the first one as a baseline.') 696 | parser.add_argument('-p', '--prefix', action='store', required=True, 697 | help='log file prefix for report generation') 698 | parser.add_argument('-o', '--output', action='store', required=True, 699 | help='target report file name in markdown format') 700 | parser.add_argument('-f', '--force', action='store_true', 701 | help='force to regenerate all CSV files') 702 | parser.add_argument('-g', '--debug', action='store_true', 703 | help='print out debug messages') 704 | args = parser.parse_args(argv) 705 | global dbg_prt 706 | dbg_prt = args.debug 707 | global out_dir 708 | out_dir = os.path.dirname(args.output) 709 | return args 710 | 711 | if __name__ == "__main__": 712 | args = get_cmd_options(sys.argv[1:]) 713 | app_stats = build_app_stats(args) 714 | gen_report(args, app_stats) 715 | 716 | --------------------------------------------------------------------------------