├── micro-bench
    ├── .gitignore
    └── gbench
    │   ├── Makefile
    │   └── gbench.c
├── Makefile
├── .gitignore
├── bin
    ├── sched-config
    ├── schedmon
    ├── energyprof
    ├── procmon
    ├── scmon
    ├── scinsight
    ├── mbench
    ├── energyinsight
    ├── ginsight
    ├── procinsight
    └── report
├── config
    └── MangoHud.conf
├── README.md
└── LICENSE


/micro-bench/.gitignore:
--------------------------------------------------------------------------------
1 | /schbench
2 | /gbench/gbench
3 | 
4 | 


--------------------------------------------------------------------------------
/micro-bench/gbench/Makefile:
--------------------------------------------------------------------------------
 1 | CC      = gcc
 2 | CFLAGS  = -Wall -O1 -g -W -Wno-unused-parameter
 3 | ALL_CFLAGS = $(CFLAGS) -D_GNU_SOURCE -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64
 4 | 
 5 | PROGS = gbench
 6 | ALL = $(PROGS)
 7 | 
 8 | $(PROGS): | depend
 9 | 
10 | all: $(ALL)
11 | 
12 | %.o: %.c
13 | 	$(CC) -o $*.o -c $(ALL_CFLAGS) $<
14 | 
15 | gbench: gbench.o
16 | 	$(CC) $(ALL_CFLAGS) -o $@ $(filter %.o,$^) -lpthread -lm
17 | 
18 | depend:
19 | 	@$(CC) -MM $(ALL_CFLAGS) *.c 1> .depend
20 | 
21 | clean:
22 | 	-rm -f *.o $(PROGS) .depend
23 | 
24 | ifneq ($(wildcard .depend),)
25 | include .depend
26 | endif
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | MK_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
 2 | INSTALL_DIR := $(MK_DIR)/bin
 3 | 
 4 | all: schbench gbench
 5 | 
 6 | micro-bench/schbench/schbench.c:
 7 | 	(cd $(MK_DIR)/micro-bench/; git clone --depth 1 https://kernel.googlesource.com/pub/scm/linux/kernel/git/mason/schbench)
 8 | 
 9 | schbench: micro-bench/schbench/schbench.c
10 | 	(cd $(MK_DIR)/micro-bench/schbench && make)
11 | 	cp $$(find $(MK_DIR)/micro-bench/schbench -type f -executable -print | grep -v '\.git/') $(INSTALL_DIR)
12 | 
13 | gbench: micro-bench/gbench/gbench.c
14 | 	(cd $(MK_DIR)/micro-bench/gbench && make)
15 | 	cp $$(find $(MK_DIR)/micro-bench/gbench -type f -executable -print) $(INSTALL_DIR)
16 | 
17 | clean:
18 | 	(cd $(MK_DIR)/micro-bench/gbench && make clean)
19 | 	(cd $(MK_DIR)/micro-bench/schbench && make clean && rm -rf $(MK_DIR)/micro-bench/schbench)
20 | 
21 | .PHONY: all schbench gbench clean
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /bin/schbench
 2 | /bin/gbench
 3 | /bin/schedstat.py
 4 | /logs*
 5 | .depend
 6 | *.o
 7 | 
 8 | # http://www.gnu.org/software/automake
 9 | 
10 | Makefile.in
11 | /ar-lib
12 | /mdate-sh
13 | /py-compile
14 | /test-driver
15 | /ylwrap
16 | .deps/
17 | .dirstamp
18 | 
19 | # http://www.gnu.org/software/autoconf
20 | 
21 | autom4te.cache
22 | /autoscan.log
23 | /autoscan-*.log
24 | /aclocal.m4
25 | /compile
26 | /config.cache
27 | /config.guess
28 | /config.h.in
29 | /config.log
30 | /config.status
31 | /config.sub
32 | /configure
33 | /configure.scan
34 | /depcomp
35 | /install-sh
36 | /missing
37 | /stamp-h1
38 | 
39 | # https://www.gnu.org/software/libtool/
40 | 
41 | /ltmain.sh
42 | 
43 | # http://www.gnu.org/software/texinfo
44 | 
45 | /texinfo.tex
46 | 
47 | # http://www.gnu.org/software/m4/
48 | 
49 | m4/libtool.m4
50 | m4/ltoptions.m4
51 | m4/ltsugar.m4
52 | m4/ltversion.m4
53 | m4/lt~obsolete.m4
54 | 
55 | # Generated Makefile
56 | # (meta build system like autotools,
57 | # can automatically generate from config.status script
58 | # (which is called by configure script))
59 | Makefile
60 | 


--------------------------------------------------------------------------------
/bin/sched-config:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import sys
 3 | import csv
 4 | import argparse
 5 | 
 6 | def sched_config_set(config_csv):
 7 |     with open(config_csv, "r") as cf:
 8 |         rd = csv.reader(cf)
 9 |         rd.__next__()
10 |         for (k, v) in rd:
11 |             k = k.strip()
12 |             v = v.strip()
13 |             with open(k, "w") as kf:
14 |                print(v, file = kf) 
15 | 
16 | def sched_config_get(config_csv):
17 |     params = ["/proc/sys/kernel/sched_cfs_bandwidth_slice_us",
18 |               "/sys/kernel/debug/sched/latency_ns",
19 |               "/sys/kernel/debug/sched/min_granularity_ns", 
20 |               "/sys/kernel/debug/sched/wakeup_granularity_ns", 
21 |               "/sys/kernel/debug/sched/migration_cost_ns", 
22 |               "/sys/kernel/debug/sched/nr_migrate"]
23 | 
24 |     with open(config_csv, "w") as cf:
25 |         print("%s, %s" % ("{0:^50}".format("attribute"),
26 |                           "{0:^20}".format("value")), file = cf)
27 |         for p in params:
28 |             with open(p, "r") as f:
29 |                 l = f.readline()
30 |                 v = l.strip()
31 |                 print("%s, %s" % ("{0:<50}".format(p),
32 |                                   "{0:>20}".format(v)), file = cf)
33 | 
34 | def sched_config(args):
35 |     if args.get != None:
36 |         sched_config_get(args.get)
37 |     elif args.set != None:
38 |         sched_config_set(args.set)
39 | 
40 | def get_cmd_options(argv):
41 |     parser = argparse.ArgumentParser(
42 |             prog = "sched-config",
43 |             description = "Set or get the scheduler config parameters")
44 |     parser.add_argument('-g', '--get', action='store',
45 |                         help='Get the scheculer parameters') 
46 |     parser.add_argument('-s', '--set', action='store',
47 |                         help='Set the scheculer parameters') 
48 |     args = parser.parse_args(argv)
49 | 
50 |     # sanity check of arguments
51 |     nopts = (0 if args.get == None else 1)  + \
52 |             (0 if args.set == None else 1)
53 |     if nopts != 1:
54 |         parser.print_help()
55 |         print("sched-config: error: either '-g' or '-s' should be specified", 
56 |               file = sys.stderr)
57 |         exit(1)
58 |     return args
59 | 
60 | 
61 | if __name__ == "__main__":
62 |     args = get_cmd_options(sys.argv[1:])
63 |     sched_config(args)
64 | 
65 | """
66 | attribute                                        , value
67 | /proc/sys/kernel/sched_cfs_bandwidth_slice_us    ,    3000
68 | /sys/kernel/debug/sched/latency_ns               , 3000000
69 | /sys/kernel/debug/sched/min_granularity_ns       ,  300000
70 | /sys/kernel/debug/sched/wakeup_granularity_ns    ,  500000
71 | /sys/kernel/debug/sched/migration_cost_ns        ,   50000
72 | /sys/kernel/debug/sched/nr_migrate               ,     128
73 | """
74 | 


--------------------------------------------------------------------------------
/bin/schedmon:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import os
 3 | import sys
 4 | import subprocess
 5 | import signal
 6 | import glob
 7 | import argparse
 8 | import psutil
 9 | 
10 | mon_procs = []
11 | 
12 | def ignore_term_signals(): 
13 |     term_signals = (signal.SIGTERM, signal.SIGINT, signal.SIGABRT, 
14 |                     signal.SIGBUS, signal.SIGILL, signal.SIGSEGV, 
15 |                     signal.SIGHUP)
16 |     for s in term_signals: 
17 |         # do nothing upon kill signals for graceful exit
18 |         signal.signal(s, lambda signum, frame: None)
19 | 
20 | def get_log_name(args, kind):
21 |     log = os.path.join(args.outdir,
22 |                        args.log + "-schedmon-" + kind+ "__.log")
23 |     return log
24 | 
25 | def run_schedmon(args):
26 |     # prep for gracefil termination
27 |     ignore_term_signals()
28 | 
29 |     # prep for logging
30 |     subprocess.Popen("mkdir -p " + args.outdir,
31 |                      shell=True, stdout=None, stderr=None).wait()
32 |     outdir = args.outdir
33 | 
34 |     # launch a background monitor
35 |     log = get_log_name(args, "raw")
36 |     sh_cmd = "perf sched record -ag -o %s sleep 36500d" % log
37 |     p = subprocess.Popen(sh_cmd, shell=True)
38 |     mon_procs.append(p)
39 | 
40 | def wait_for_schedmon(args):
41 |     # wait for the background processes
42 |     for p in mon_procs:
43 |         p.wait()
44 | 
45 |     # prep comand line for report generation
46 |     sh_cmds = []
47 |     raw_log = get_log_name(args, "raw")
48 |     # - latency
49 |     log = get_log_name(args, "latency")
50 |     sh_cmd = "perf sched latency -i %s >  %s" % (raw_log, log)
51 |     sh_cmds.append(sh_cmd)
52 |     # - map
53 |     log = get_log_name(args, "map")
54 |     sh_cmd = "perf sched map -i %s >  %s" % (raw_log, log)
55 |     sh_cmds.append(sh_cmd)
56 |     # - timehist (full)
57 |     log = get_log_name(args, "timehist_full")
58 |     sh_cmd = "perf sched timehist -SMVwng -i %s >  %s" % (raw_log, log)
59 |     sh_cmds.append(sh_cmd)
60 |     # - timehist (short)
61 |     # log = get_log_name(args, "timehist_short")
62 |     # sh_cmd = "perf sched timehist -Sg     -i %s >  %s" % (raw_log, log)
63 |     # sh_cmds.append(sh_cmd)
64 | 
65 |     # launch background monitors
66 |     for sh_cmd in sh_cmds:
67 |         p = subprocess.Popen(sh_cmd, shell=True)
68 |         mon_procs.append(p)
69 |         p.wait()
70 | 
71 | 
72 | def get_cmd_options(argv):
73 |     parser = argparse.ArgumentParser(
74 |             prog = "schedmon",
75 |             description = "Collect the detailed scheduler activity internally using `perf sched`")
76 |     parser.add_argument('-o', '--outdir', action='store', required=True,
77 |                         help='output directory') 
78 |     parser.add_argument('-l', '--log', action='store', required=True,
79 |                         help='log file prefix') 
80 |     args = parser.parse_args(argv)
81 | 
82 |     return args
83 | 
84 | if __name__ == "__main__":
85 |     args = get_cmd_options(sys.argv[1:])
86 |     run_schedmon(args)
87 |     wait_for_schedmon(args)
88 | 
89 | 


--------------------------------------------------------------------------------
/bin/energyprof:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import os
  3 | import sys
  4 | import subprocess
  5 | import signal
  6 | import glob
  7 | import argparse
  8 | import psutil
  9 | 
 10 | bg_proc = None
 11 | 
 12 | def ignore_term_signals(): 
 13 |     term_signals = (signal.SIGTERM, signal.SIGINT, signal.SIGABRT, 
 14 |                     signal.SIGBUS, signal.SIGILL, signal.SIGSEGV, 
 15 |                     signal.SIGHUP)
 16 |     for s in term_signals: 
 17 |         # do nothing upon kill signals for graceful exit
 18 |         signal.signal(s, lambda signum, frame: None)
 19 | 
 20 | def get_log_name(args):
 21 |     log = os.path.join(args.outdir, args.log + "-energyprof__.log")
 22 |     return log
 23 | 
 24 | def get_cpu_configs(args):
 25 |     configs = {8: ["0", "0-1", "0,4", "0-3", "0,1,4,5", "0-7"],
 26 |                16: ["0", "0-1", "0,8", "0-3", "0,1,7,8", "0-7", "0-3,8-11", "0-15"], }
 27 |     return configs[args.num_cpus]
 28 | 
 29 | def chcpu(args, config, f):
 30 |     subprocess.Popen("chcpu -d 1-" + str(args.num_cpus), shell=True, stdout=f, stderr=f).wait()
 31 |     subprocess.Popen("chcpu -e " + config, shell=True, stdout=f, stderr=f).wait()
 32 |     subprocess.Popen("sleep 1", shell=True).wait()
 33 | 
 34 | def print_config(args, config, load, f):
 35 |     fds = [f, sys.__stdout__]
 36 |     for fd in fds:
 37 |         print("## cpu=" + config + ", load=" + str(load) +
 38 |               ",util=" + str(load * args.num_cpus), file=fd)
 39 | 
 40 | def run_ubench(args, config, load, f):
 41 |     cmd = "perf stat -a --per-socket -e power/energy-pkg/ " + \
 42 |           "stress-ng --change-cpu --no-rand-seed " + \
 43 |           "--taskset " + config + " --all " + str(args.num_cpus) + \
 44 |           " --cpu " + str(args.num_cpus) + " --cpu-method all " + \
 45 |           " --cpu-load " + str(load) + " --cpu-load-slice 3 " + \
 46 |           " --metrics -t " + str(args.time_sec)
 47 |     subprocess.Popen(cmd, shell=True, stdout=f, stderr=f).wait()
 48 | 
 49 | def run_energyprof(args):
 50 |     # prep for gracefil termination
 51 |     ignore_term_signals()
 52 | 
 53 |     # prep for logging
 54 |     subprocess.Popen("mkdir -p " + args.outdir,
 55 |                      shell=True, stdout=None, stderr=None).wait()
 56 |     outdir = args.outdir
 57 |     log = get_log_name(args)
 58 |     with open(log, 'w') as f:
 59 |         # turn on turbostat as a background process
 60 |         global bg_proc
 61 |         bg_proc = subprocess.Popen(
 62 |                 "taskset -c 0 turbostat --header_iterations 1 -S",
 63 |                 shell=True, stdout=f, stderr=f)
 64 |         subprocess.Popen("sleep 5", shell=True).wait()
 65 | 
 66 |         # for each CPU set
 67 |         for config in get_cpu_configs(args):
 68 |             # make only target CPUs online
 69 |             chcpu(args, config, f)
 70 | 
 71 |             # for low load settings
 72 |             for load in range(1, 7):
 73 |                 print_config(args, config, load, f)
 74 |                 run_ubench(args, config, load, f)
 75 | 
 76 |             # for high load settings
 77 |             for load in range(12, 101, 6):
 78 |                 print_config(args, config, load, f)
 79 |                 run_ubench(args, config, load, f)
 80 | 
 81 |         wait_for_energyprof(args, f)
 82 | 
 83 | 
 84 | def wait_for_energyprof(args, f):
 85 |     # stop the turbostat
 86 |     subprocess.Popen("pkill turbostat", shell=True).wait()
 87 |     global bg_proc
 88 |     bg_proc.wait()
 89 | 
 90 |     # activate all CPUs
 91 |     chcpu(args, "0-" + str(args.num_cpus - 1), f)
 92 | 
 93 | def get_cmd_options(argv):
 94 |     parser = argparse.ArgumentParser(
 95 |             prog = "energyprof",
 96 |             description = "Collect energy and performance statistics according to CPU load",
 97 |             epilog = "energyprof internally uses 'turbostat', 'stress-ng', 'chcpu', 'taskset', and 'perf'.")
 98 |     parser.add_argument('-c', '--num_cpus', action='store', type=int,
 99 |                         required=True, help='number of CPUs of this machine') 
100 |     parser.add_argument('-t', '--time_sec', action='store', type=int,
101 |                         default=120, help='time in sec to run stress-_ng') 
102 |     parser.add_argument('-o', '--outdir', action='store', required=True,
103 |                         help='output directory') 
104 |     parser.add_argument('-l', '--log', action='store', required=True,
105 |                         help='log file prefix') 
106 | 
107 |     args = parser.parse_args(argv)
108 |     return args
109 | 
110 | if __name__ == "__main__":
111 |     args = get_cmd_options(sys.argv[1:])
112 |     run_energyprof(args)
113 | 


--------------------------------------------------------------------------------
/bin/procmon:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import os
  3 | import sys
  4 | import subprocess
  5 | import signal
  6 | import glob
  7 | import argparse
  8 | import psutil
  9 | 
 10 | mon_procs = []
 11 | 
 12 | def ignore_term_signals(): 
 13 |     term_signals = (signal.SIGTERM, signal.SIGINT, signal.SIGABRT, 
 14 |                     signal.SIGBUS, signal.SIGILL, signal.SIGSEGV, 
 15 |                     signal.SIGHUP)
 16 |     for s in term_signals: 
 17 |         # do nothing upon kill signals for graceful exit
 18 |         signal.signal(s, lambda signum, frame: None)
 19 | 
 20 | def get_log_name(args, mon):
 21 |     log = os.path.join(args.outdir,
 22 |                        args.log + "-procmon-" + mon + "__.log")
 23 |     return log
 24 | 
 25 | def run_procmons(args):
 26 |     # prep for gracefil termination
 27 |     ignore_term_signals()
 28 | 
 29 |     # prep for logging
 30 |     subprocess.Popen("mkdir -p " + args.outdir,
 31 |                      shell=True, stdout=None, stderr=None).wait()
 32 |     outdir = args.outdir
 33 | 
 34 |     # prep comand line for each background monitor
 35 |     sh_cmds = []
 36 |     #   -s, --sched
 37 |     if args.sched:
 38 |         log = get_log_name(args, "sched")
 39 |         dat = log + ".dat"
 40 |         sh_cmd = "trace-cmd record -e sched_wakeup -o %s > /dev/null" % dat
 41 |         sh_cmds.append(sh_cmd)
 42 |     #   -c, --cstate
 43 |     if args.cstate:
 44 |         log = get_log_name(args, "cstate")
 45 |         sh_cmd = "cpupower monitor sleep 36500d > " + log
 46 |         sh_cmds.append(sh_cmd)
 47 |     #   -e, --energy
 48 |     if args.energy:
 49 |         log = get_log_name(args, "energy")
 50 |         sh_cmd = "perf stat -a --per-socket -e power/energy-pkg/ 2>" + log
 51 |         sh_cmds.append(sh_cmd)
 52 |     #   -p, --perf
 53 |     if args.perf:
 54 |         log = get_log_name(args, "perf")
 55 |         sh_cmd = "perf stat -a 2>" + log
 56 |         sh_cmds.append(sh_cmd)
 57 | 
 58 |     # launch background monitors
 59 |     for sh_cmd in sh_cmds:
 60 |         p = subprocess.Popen(sh_cmd, shell=True)
 61 |         mon_procs.append(p)
 62 | 
 63 | def wait_for_procmons(args):
 64 |     # wait for the background processes
 65 |     for p in mon_procs:
 66 |         p.wait()
 67 | 
 68 |     # if '--sched' is on, generates a report using 'trace-cmd'
 69 |     if args.sched:
 70 |         log = get_log_name(args, "sched")
 71 |         dat = log + ".dat"
 72 |         sh_cmd = "trace-cmd report -i %s > %s" % (dat, log)
 73 |         p = subprocess.Popen(sh_cmd, shell=True)
 74 |         p.wait()
 75 | 
 76 | def get_cmd_options(argv):
 77 |     parser = argparse.ArgumentParser(
 78 |             prog = "procmon",
 79 |             description = "Collect CPU statistics and system-wide scheduling statistics",
 80 |             epilog = "procmon internally uses 'trace-cmd', 'cpupower', and 'perf'.")
 81 |     parser.add_argument('-o', '--outdir', action='store', required=True,
 82 |                         help='output directory') 
 83 |     parser.add_argument('-l', '--log', action='store', required=True,
 84 |                         help='log file prefix') 
 85 | 
 86 |     parser.add_argument('-s', '--sched', action='store_true',
 87 |                         help='trace wake-up events of process scheduler') 
 88 |     parser.add_argument('-c', '--cstate', action='store_true',
 89 |                         help='trace c-state of all CPUs') 
 90 |     parser.add_argument('-e', '--energy', action='store_true',
 91 |                         help='trace energy consumption of all CPUs') 
 92 |     parser.add_argument('-p', '--perf', action='store_true',
 93 |                         help='trace performance statistics of all CPUs') 
 94 | 
 95 |     parser.add_argument('-a', '--all', action='store_true',
 96 |                         help='trace all statistics') 
 97 |     args = parser.parse_args(argv)
 98 | 
 99 |     # sanity check of arguments
100 |     if args.all:
101 |         (args.sched, args.cstate, args.energy, args.perf) = \
102 |                 (True, True, True, True)
103 |     nopts = (0 if args.sched == None else 1)  + \
104 |             (0 if args.cstate == None else 1) + \
105 |             (0 if args.energy == None else 1) + \
106 |             (0 if args.perf == None else 1)
107 |     if nopts == 0:
108 |         parser.print_help()
109 |         print("procmon: error: at least one out of '-s', '-c', `-e`, or '-p'" \
110 |                 "should be specified", file = sys.stderr)
111 |         exit(1)
112 |     return args
113 | 
114 | if __name__ == "__main__":
115 |     args = get_cmd_options(sys.argv[1:])
116 |     run_procmons(args)
117 |     wait_for_procmons(args)
118 | 
119 | 
120 | 
121 | 


--------------------------------------------------------------------------------
/bin/scmon:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import os
  3 | import sys
  4 | import subprocess
  5 | import signal
  6 | import glob
  7 | import argparse
  8 | import psutil
  9 | 
 10 | def pidof(pname):
 11 |     pids = []
 12 |     for p in psutil.process_iter():
 13 |        try:
 14 |            pinfo = p.as_dict(attrs=['pid', 'name'])
 15 |            if (pinfo['name'] == pname):
 16 |                pids.append( pinfo['pid'] )
 17 |        except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
 18 |            pass
 19 |     return pids
 20 | 
 21 | def pstree(pid):
 22 |     try:
 23 |         children = psutil.Process(pid).children(True)
 24 |         pids = map(lambda c: c.as_dict()['pid'], children)
 25 |         return list(pids)
 26 |     except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
 27 |         return []
 28 | 
 29 | def strace_pids(pids, odir, log):
 30 |     sh_cmd = "strace -o {log_path} -ff -p {pids}".format(
 31 |             log_path=os.path.join(odir, log), 
 32 |             pids=",".join(str(pid) for pid in pids))
 33 |     p = subprocess.Popen(sh_cmd, shell=True, stdout=None, stderr=None)
 34 |     p.wait()
 35 |     return p
 36 | 
 37 | def strace_cmd(cmd, odir, log):
 38 |     sh_cmd = ["strace", "-o", os.path.join(odir, log), "-ff"] + cmd
 39 |     p = subprocess.Popen(sh_cmd)
 40 |     p.wait()
 41 |     return p
 42 | 
 43 | def ignore_term_signals(): 
 44 |     term_signals = (signal.SIGTERM, signal.SIGINT, signal.SIGABRT, 
 45 |                     signal.SIGBUS, signal.SIGILL, signal.SIGSEGV, 
 46 |                     signal.SIGHUP)
 47 |     for s in term_signals: 
 48 |         # do nothing upon kill signals for graceful exit
 49 |         signal.signal(s, lambda signum, frame: None)
 50 | 
 51 | def run_syscall_trace(args):
 52 |     # prep for gracefil termination
 53 |     ignore_term_signals()
 54 | 
 55 |     # prep for logging
 56 |     log = args.log + "-scmon"
 57 |     subprocess.Popen("mkdir -p " + args.outdir,
 58 |                      shell=True, stdout=None, stderr=None).wait()
 59 |     outdir = args.outdir
 60 | 
 61 |     # strace with a command
 62 |     if args.cmd:
 63 |         strace_cmd(args.cmd, outdir, log)
 64 |         return
 65 | 
 66 |     # strace with process id(s)
 67 |     #   -p: strace of pid
 68 |     if args.pid:
 69 |         pids = [args.pid]
 70 |     #   -r: strace of pid and all its decendents
 71 |     elif args.root:
 72 |         pids = pstree(args.root)
 73 |     #   -n: strace of a process with 'name' and all its decendents
 74 |     elif args.name:
 75 |         ps = pidof(args.name)
 76 |         if ps == []:
 77 |             print("scmon: error: %s does not exists" % args.name,
 78 |                   file = sys.stderr)
 79 |             exit(1)
 80 |         pids = []
 81 |         for p in ps:
 82 |             pids = pids + pstree(p)
 83 |     strace_pids(pids, outdir, log)
 84 | 
 85 | def get_cmd_options(argv):
 86 |     parser = argparse.ArgumentParser(
 87 |             prog = "scmon",
 88 |             description = "Collect system call usage statistics of a program",
 89 |             epilog = "For example, 'scmon -o log -l steam -n steam' to log " \
 90 |                      "the system call usage of 'steam' and all its decendents " \
 91 |                      "under log/steam*-scmon*.")
 92 |     parser.add_argument('-o', '--outdir', action='store', required=True,
 93 |                         help='output directory') 
 94 |     parser.add_argument('-l', '--log', action='store', required=True,
 95 |                         help='log file prefix' ) 
 96 |     parser.add_argument('-p', '--pid', action='store', type=int,
 97 |                         help='process id to monitor') 
 98 |     parser.add_argument('-r', '--root', action='store', type=int,
 99 |                         help='root process id to monitor ' \
100 |                                 '(all decendents will be monitored)') 
101 |     parser.add_argument('-n', '--name', action='store', 
102 |                         help='name of a process to monitor')
103 |     parser.add_argument('-c', '--cmd', action='store', nargs='+',
104 |                         help='command to execute') 
105 | 
106 |     args = parser.parse_args(argv)
107 | 
108 |     # check if only one of -p, -r, or -c is specified
109 |     nprogs = (0 if args.pid == None else 1)  + \
110 |              (0 if args.root == None else 1) + \
111 |              (0 if args.name== None else 1) + \
112 |              (0 if args.cmd == None else 1)
113 |     if nprogs != 1:
114 |         parser.print_help()
115 |         print("scmon: error: only one out of '-p', '-r', `-n`, or '-c'" \
116 |                 "should be specified", file = sys.stderr)
117 |         exit(1)
118 |     return args
119 | 
120 | if __name__ == "__main__":
121 |     args = get_cmd_options(sys.argv[1:])
122 | 
123 |     run_syscall_trace(args)
124 | 
125 | 


--------------------------------------------------------------------------------
/bin/scinsight:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import os
  3 | import sys
  4 | import subprocess
  5 | import signal
  6 | import glob
  7 | import zlib
  8 | import matplotlib.pyplot as plt
  9 | import argparse
 10 | 
 11 | def get_syscall_name(line):
 12 |     at = line.find('(')
 13 |     syscall = line[0:at]
 14 |     if at == -1 or syscall.isalnum() == False:
 15 |         return None
 16 |     # further classify futex into futex wait and wake.
 17 |     if syscall == "futex": 
 18 |         if line.find("FUTEX_WAIT") > 0:
 19 |             syscall = "futex::wait"
 20 |         elif line.find("FUTEX_WAKE") > 0:
 21 |             if line.find(") = 0") > 0:
 22 |                 syscall = "futex::wake:non-zero"
 23 |             else:
 24 |                 syscall = "futex::wake:zero"
 25 |         else:
 26 |             syscall = "futex::other"
 27 |     return syscall
 28 | 
 29 | def get_strace_names(odir, log):
 30 |     prefix = os.path.join(odir, log)
 31 |     #
 32 |     # Match only prefix-scmon.* files.
 33 |     # 1. /path/to/prefix-scmon*
 34 |     #
 35 |     # Treat /path/to/prefix as a directory, and match any *-scmon.* files
 36 |     # inside of it.
 37 |     # 2. /path/to/prefix/*-scmon.*
 38 |     #
 39 |     logs = prefix + '-scmon.*'
 40 |     if not glob.glob(logs):
 41 |         logs = os.path.join(prefix, '*-scmon.*')
 42 |     return logs
 43 | 
 44 | def syscall_stat(odir, log):
 45 |     stat = {}
 46 |     logs = get_strace_names(odir, log)
 47 |     for log in glob.glob(logs):
 48 |         with open(log, 'r') as f:
 49 |             for line in f:
 50 |                 syscall = get_syscall_name(line)
 51 |                 if syscall != None: 
 52 |                     stat[syscall] = stat.get(syscall, 0) + 1
 53 | 
 54 |     total = float(sum(stat.values()))
 55 |     scr_list = [(s, c, float(c)/total * 100.0) for s, c in stat.items()]
 56 |     scr_list.sort(key=lambda x: x[1], reverse=True)
 57 |     return scr_list
 58 | 
 59 | def get_log_name(args, out):
 60 |     log = os.path.join(args.outdir, args.log) + "-scinsight-stat." + out
 61 |     return log
 62 | 
 63 | def hash_rgb_from_str(s):
 64 |     h = zlib.crc32( bytes(s, 'utf-8') )
 65 |     r = (h & 0x00FF00) >> 8
 66 |     g = (h & 0xFF0000) >> 16
 67 |     b = (h & 0x0000FF) >> 0
 68 |     return (float(r)/0xFF, float(g)/0xFF, float(b)/0xFF)
 69 | 
 70 | def reset_plot():
 71 |     plt.clf()
 72 |     plt.style.use('default')
 73 |     plt.rcParams['font.size'] = 7
 74 | 
 75 | def gen_pie_chart(args, scr_list):
 76 |     # unzip stat
 77 |     sys_list, cnt_list, ratio_list = list(zip(*scr_list))
 78 |     # label for each syscall
 79 |     label_list = list( map(lambda t: "%s (%.2f%s)" % (t[0], t[1], "%"), 
 80 |                            zip(sys_list, ratio_list)) )
 81 |     # assign a color per syscall
 82 |     rgb_list = list( map(lambda s: hash_rgb_from_str(s), sys_list) )
 83 | 
 84 |     # clear canvas
 85 |     reset_plot()
 86 |     fig, ax = plt.subplots(figsize=(3.5,3))
 87 |     ax.pie(cnt_list, labels=label_list, colors=rgb_list)
 88 |     fig_name = get_log_name(args, "svg")
 89 |     plt.savefig(fig_name)
 90 |     plt.close()
 91 | 
 92 | def report_syscall_stat_in_csv(scr_list, f):
 93 |     print("%s, %s, %s" % ("{0:^20}".format("syscall"),
 94 |                           "{0:^20}".format("count"),
 95 |                           "{0:^20}".format("ratio (%)")), file = f)
 96 | 
 97 |     for s, c, r in scr_list:
 98 |         print("%s, %s, %s" % ("{0:<20}".format(s),
 99 |                               "{0:>20}".format(c),
100 |                               "{0:>20}".format("%.4f" % r)), file = f)
101 | 
102 | def report_syscall_stat(args):
103 |     # collect stat
104 |     scr_list = syscall_stat(args.outdir, args.log)
105 |     # generate a pie chart in svg
106 |     gen_pie_chart(args, scr_list)
107 |     # report in csv
108 |     with open(get_log_name(args, "csv"), "w") as f:
109 |         report_syscall_stat_in_csv(scr_list, f)
110 |         if args.quiet == False:
111 |             report_syscall_stat_in_csv(scr_list, sys.stdout)
112 | 
113 | def get_cmd_options(argv):
114 |     parser = argparse.ArgumentParser(
115 |             prog = "scinsight",
116 |             description = "Report system call usage statistics of a program",)
117 |     parser.add_argument('-o', '--outdir', action='store', required=True,
118 |                         help='output directory') 
119 |     parser.add_argument('-l', '--log', action='store', required=True,
120 |                         help='log file prefix, or path to directory containing log files')
121 |     parser.add_argument('-q', '--quiet', action='store_true',
122 |                         help='do not print result to stdout' ) 
123 | 
124 |     args = parser.parse_args(argv)
125 |     return args
126 | 
127 | if __name__ == "__main__":
128 |     args = get_cmd_options(sys.argv[1:])
129 |     report_syscall_stat(args)
130 | 
131 | 
132 | 


--------------------------------------------------------------------------------
/config/MangoHud.conf:
--------------------------------------------------------------------------------
  1 | ### MangoHud configuration file
  2 | ### Uncomment any options you wish to enable. Default options are left uncommented
  3 | ### Use some_parameter=0 to disable a parameter (only works with on/off parameters)
  4 | ### Everything below can be used / overridden with the environment variable MANGOHUD_CONFIG instead
  5 | 
  6 | ################ PERFORMANCE #################
  7 | 
  8 | ### Limit the application FPS. Comma-separated list of one or more FPS values (e.g. 0,30,60). 0 means unlimited (unless VSynced)
  9 | # fps_limit=
 10 | 
 11 | ### VSync [0-3] 0 = adaptive; 1 = off; 2 = mailbox; 3 = on
 12 | # vsync=
 13 | 
 14 | ### OpenGL VSync [0-N] 0 = off; >=1 = wait for N v-blanks, N > 1 acts as a FPS limiter (FPS = display refresh rate / N)
 15 | # gl_vsync=
 16 | 
 17 | ################### VISUAL ###################
 18 | 
 19 | ### Legacy layout
 20 | # legacy_layout=false
 21 | 
 22 | ### Display custom centered text, useful for a header
 23 | # custom_text_center=
 24 | 
 25 | ### Display the current system time
 26 | # time
 27 | 
 28 | ### Time formatting examples
 29 | # time_format=%H:%M
 30 | # time_format=[ %T %F ]
 31 | # time_format=%X # locally formatted time, because of limited glyph range, missing characters may show as '?' (e.g. Japanese)
 32 | 
 33 | ### Display MangoHud version
 34 | # version
 35 | 
 36 | ### Display the current GPU information
 37 | ## Note: gpu_mem_clock also needs "vram" to be enabled
 38 | gpu_stats
 39 | # gpu_temp
 40 | # gpu_core_clock
 41 | # gpu_mem_clock
 42 | # gpu_power
 43 | # gpu_text=GPU
 44 | # gpu_load_change
 45 | # gpu_load_value=60,90
 46 | # gpu_load_color=39F900,FDFD09,B22222
 47 | 
 48 | ### Display the current CPU information
 49 | cpu_stats
 50 | # cpu_temp
 51 | # cpu_power
 52 | # cpu_text=CPU
 53 | # cpu_mhz
 54 | # cpu_load_change
 55 | # cpu_load_value=60,90
 56 | # cpu_load_color=39F900,FDFD09,B22222
 57 | 
 58 | ### Display the current CPU load & frequency for each core
 59 | # core_load
 60 | # core_load_change
 61 | 
 62 | ### Display IO read and write for the app (not system)
 63 | # io_stats
 64 | # io_read
 65 | # io_write
 66 | 
 67 | ### Display system vram / ram / swap space usage
 68 | # vram
 69 | ram
 70 | swap
 71 | 
 72 | ### Display per process memory usage
 73 | ## Show resident memory and other types, if enabled
 74 | # procmem
 75 | # procmem_shared
 76 | # procmem_virt
 77 | 
 78 | ### Display battery information
 79 | # battery
 80 | # battery_icon
 81 | # gamepad_battery
 82 | # gamepad_battery_icon
 83 | 
 84 | ### Display FPS and frametime
 85 | fps
 86 | # fps_sampling_period=500
 87 | # fps_color_change
 88 | # fps_value=30,60
 89 | # fps_color=B22222,FDFD09,39F900
 90 | frametime
 91 | # frame_count
 92 | 
 93 | ### Display miscellaneous information
 94 | # engine_version
 95 | # gpu_name
 96 | # vulkan_driver
 97 | # wine
 98 | 
 99 | ### Display loaded MangoHud architecture
100 | # arch
101 | 
102 | ### Display the frametime line graph
103 | frame_timing
104 | # histogram
105 | 
106 | ### Display GameMode / vkBasalt running status
107 | # gamemode
108 | # vkbasalt
109 | 
110 | ### Display current FPS limit
111 | # show_fps_limit
112 | 
113 | ### Display the current resolution
114 | # resolution
115 | 
116 | ### Display custom text
117 | # custom_text=
118 | ### Display output of Bash command in next column
119 | # exec=
120 | 
121 | ### Display media player metadata
122 | # media_player
123 | # media_player_name=spotify
124 | ## Format metadata, lines are delimited by ; (wip)
125 | # media_player_format={title};{artist};{album}
126 | # media_player_format=Track:;{title};By:;{artist};From:;{album}
127 | 
128 | ### Change the hud font size
129 | # font_size=24
130 | # font_scale=1.0
131 | # font_size_text=24
132 | # font_scale_media_player=0.55
133 | # no_small_font
134 | 
135 | ### Change default font (set location to TTF/OTF file)
136 | ## Set font for the whole hud
137 | # font_file=
138 | 
139 | ## Set font only for text like media player metadata
140 | # font_file_text=
141 | 
142 | ## Set font glyph ranges. Defaults to Latin-only. Don't forget to set font_file/font_file_text to font that supports these
143 | ## Probably don't enable all at once because of memory usage and hardware limits concerns
144 | ## If you experience crashes or text is just squares, reduce glyph range or reduce font size
145 | # font_glyph_ranges=korean,chinese,chinese_simplified,japanese,cyrillic,thai,vietnamese,latin_ext_a,latin_ext_b
146 | 
147 | ### Change the hud position
148 | # position=top-left
149 | 
150 | ### Change the corner roundness
151 | # round_corners=
152 | 
153 | ### Disable / hide the hud by default
154 | # no_display
155 | 
156 | ### Hud position offset
157 | # offset_x=
158 | # offset_y=
159 | 
160 | ### Hud dimensions
161 | # width=
162 | # height=
163 | # table_columns=
164 | # cellpadding_y=
165 | 
166 | ### Hud transparency / alpha
167 | # background_alpha=0.5
168 | # alpha=
169 | 
170 | ### FCAT overlay
171 | ### This enables an FCAT overlay to perform frametime analysis on the final image stream.
172 | ### Enable the overlay
173 | # fcat
174 | ### Set the width of the FCAT overlay.
175 | ### 24 is a performance optimization on AMD GPUs that should not have adverse effects on nVidia GPUs.
176 | ### A minimum of 20 pixels is recommended by nVidia.
177 | # fcat_overlay_width=24
178 | ### Set the screen edge, this can be useful for special displays that don't update from top edge to bottom. This goes from 0 (left side) to 3 (top edge), counter-clockwise.
179 | # fcat_screen_edge=0
180 | 
181 | ### Color customization
182 | # text_color=FFFFFF
183 | # gpu_color=2E9762
184 | # cpu_color=2E97CB
185 | # vram_color=AD64C1
186 | # ram_color=C26693
187 | # engine_color=EB5B5B
188 | # io_color=A491D3
189 | # frametime_color=00FF00
190 | # background_color=020202
191 | # media_player_color=FFFFFF
192 | # wine_color=EB5B5B
193 | # battery_color=FF9078
194 | 
195 | ### Specify GPU with PCI bus ID for AMDGPU and NVML stats
196 | ### Set to 'domain:bus:slot.function'
197 | # pci_dev=0:0a:0.0
198 | 
199 | ### Blacklist
200 | # blacklist=
201 | 
202 | ### Control over socket
203 | ### Enable and set socket name, '%p' is replaced with process id
204 | # control = mangohud
205 | # control = mangohud-%p
206 | 
207 | ################ WORKAROUNDS #################
208 | ### Options starting with "gl_*" are for OpenGL
209 | ### Specify what to use for getting display size. Options are "viewport", "scissorbox" or disabled. Defaults to using glXQueryDrawable
210 | # gl_size_query=viewport
211 | 
212 | ### (Re)bind given framebuffer before MangoHud gets drawn. Helps with Crusader Kings III
213 | # gl_bind_framebuffer=0
214 | 
215 | ### Don't swap origin if using GL_UPPER_LEFT. Helps with Ryujinx
216 | # gl_dont_flip=1
217 | 
218 | ################ INTERACTION #################
219 | 
220 | ### Change toggle keybinds for the hud & logging
221 | # toggle_hud=Shift_R+F12
222 | # toggle_fps_limit=Shift_L+F1
223 | # toggle_logging=Shift_L+F2
224 | # reload_cfg=Shift_L+F4
225 | # upload_log=Shift_L+F3
226 | 
227 | #################### LOG #####################
228 | ### Automatically start the log after X seconds
229 | # autostart_log=1
230 | ### Set amount of time in seconds that the logging will run for
231 | # log_duration=
232 | ### Change the default log interval, 100 is default
233 | log_interval=0
234 | ### Set location of the output files (required for logging)
235 | output_folder=/home/deck/mangologs-vapormark
236 | ### Permit uploading logs directly to FlightlessMango.com
237 | # permit_upload=1
238 | ### Define a '+'-separated list of percentiles shown in the benchmark results
239 | ### Use "AVG" to get a mean average. Default percentiles are 97+AVG+1+0.1
240 | # benchmark_percentiles=97,AVG,1,0.1
241 | 


--------------------------------------------------------------------------------
/bin/mbench:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import os
  3 | import sys
  4 | import subprocess
  5 | import signal
  6 | import glob
  7 | import argparse
  8 | import psutil
  9 | 
 10 | cur_dir = os.path.dirname(__file__)
 11 | running_tasks = []
 12 | 
 13 | class color:
 14 |     HEADER    = '\033[95m'
 15 |     BLUE      = '\033[94m'
 16 |     GREEN     = '\033[92m'
 17 |     WARNING   = '\033[93m'
 18 |     FAIL      = '\033[91m'
 19 |     ENDC      = '\033[0m'
 20 |     BOLD      = '\033[1m'
 21 |     UNDERLINE = '\033[4m'
 22 | 
 23 | def print_log(msg):
 24 |     print(color.BLUE + "# [mbench] " + msg + color.ENDC)
 25 | 
 26 | def print_warning(msg):
 27 |     print(color.FAIL + "# [mbench] " + msg + color.ENDC)
 28 | 
 29 | def ignore_term_signals(): 
 30 |     term_signals = (signal.SIGTERM, signal.SIGINT, signal.SIGABRT, 
 31 |                     signal.SIGBUS, signal.SIGILL, signal.SIGSEGV, 
 32 |                     signal.SIGHUP)
 33 |     for s in term_signals: 
 34 |         # do nothing upon kill signals for graceful exit
 35 |         signal.signal(s, lambda signum, frame: None)
 36 | 
 37 | def kill_all(parent):
 38 |     # NOTE
 39 |     # - SIGINT should be sent since some programs (e.g., trace-cmd) terminate
 40 |     # gracefully only for SIGINT (Ctrl-C)
 41 |    
 42 |     # kill all the process tree of parent
 43 |     children = psutil.Process(parent.pid).children(True)
 44 |     for c in children:
 45 |         try:
 46 |             c.send_signal(signal.SIGINT)
 47 |         except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
 48 |             pass
 49 |     try:
 50 |         parent.send_signal(signal.SIGINT)
 51 |     except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
 52 |         pass
 53 |     # block until all terminated
 54 |     try:
 55 |         psutil.wait_procs(children)
 56 |     except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
 57 |         pass
 58 | 
 59 | def run_bench(args):
 60 |     # check performcne interference
 61 |     check_interference(args)
 62 |     # prep for gracefil termination
 63 |     ignore_term_signals()
 64 | 
 65 |     # prep for logging
 66 |     subprocess.Popen("mkdir -p " + args.outdir,
 67 |                      shell=True, stdout=None, stderr=None).wait()
 68 |     # prep command lines
 69 |     sh_cmds = []
 70 |     # 1) background task
 71 |     if args.bg != None:
 72 |         sh_cmds.append(args.bg)
 73 |     # 2) performance monitor
 74 |     if args.procmon:
 75 |         global cur_dir
 76 |         cmd = os.path.join(cur_dir, "procmon")
 77 |         arg = " -o %s -l %s -a" % (args.outdir, args.log)
 78 |         sh_cmds.append(cmd + arg)
 79 |     # 3) benchmark itself
 80 |     log_fil = os.path.join(args.outdir, args.log + ".schbench_out")
 81 |     log_out = " 2>&1 | tee " + log_fil
 82 |     sh_cmds.append(args.fg + log_out)
 83 | 
 84 |     # run commands
 85 |     for sh_cmd in sh_cmds:
 86 |         print_log(sh_cmd)
 87 |         p = subprocess.Popen(sh_cmd, shell=True)
 88 |         # prepend a task to the list
 89 |         running_tasks.insert(0, p)
 90 | 
 91 | def wait_for_tasks(args):
 92 |     # wait for the running benchmarks
 93 |     p = running_tasks[0]
 94 |     p.wait()
 95 | 
 96 |     # kill all others
 97 |     for p in running_tasks[1:]:
 98 |         kill_all(p)
 99 | 
100 | def ps_exist(pname):
101 |     for p in psutil.process_iter():
102 |        try:
103 |            pinfo = p.as_dict(attrs=['pid', 'name'])
104 |            if (pinfo['name'] == pname):
105 |                return True
106 |        except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
107 |            pass
108 |     return False
109 | 
110 | def check_interference(args):
111 |     ret = False
112 | 
113 |     # check option
114 |     if args.procmon:
115 |         print_warning("-p will affect the benchmarking accuracy.")
116 |         ret = True
117 | 
118 |     # heavy steam os tasks
119 |     heavy_bg_tasks = ["steam", "mangoapp", "gamemoded", 
120 |                       "gamescope", "steamwebhelper"]
121 |     for t in heavy_bg_tasks:
122 |         if ps_exist(t): 
123 |             print_warning("A heavy background task, %s, is detected. " \
124 |                     "It will interfere the accuracy of benchmark." % t)
125 |             ret = True
126 |     return ret
127 | 
128 | 
129 | def expand_args_config(args):
130 |     # sanity check
131 |     if args.fg != None or args.bg != None:
132 |         parser.print_help()
133 |         print("bench: error: `-c` and `-f/-b` are disjoint." \
134 |               "Only one should be specified.", file = sys.stderr)
135 |         exit(1)
136 | 
137 |     global cur_dir
138 |     if args.config == "schbench50":
139 |         args.bg = None
140 |         args.fg = os.path.join(cur_dir, "schbench") + " -F128 -n10 -r%d -A50" % (args.runtime)
141 |     elif args.config == "schbench100":
142 |         args.bg = None
143 |         args.fg = os.path.join(cur_dir, "schbench") + " -F128 -n10 -r%d" % (args.runtime)
144 |     elif args.config == "schbench200":
145 |         args.bg = os.path.join(cur_dir, "schbench") + " -F256 -n10 -r%d 2>&1 > /dev/null" % (args.runtime * 2)
146 |         args.fg = os.path.join(cur_dir, "schbench") + " -F128 -n10 -r%d" % (args.runtime)
147 |     else:
148 |         parser.print_help()
149 |         print("bench: error: unknown configuration: %s" % args.config, 
150 |               file = sys.stderr)
151 |         exit(1)
152 | 
153 | def get_cmd_options(argv):
154 |     parser = argparse.ArgumentParser(
155 |             prog = "mbench",
156 |             description = "Run a micro-benchmark with a pre-configured setting",
157 |             epilog = color.WARNING + 
158 |             """
159 | Performance monitoring (-p) WILL interfere the results of micro-benchmark. Do NOT use -p when you collect performance results. Instead, run the same benchmark twice: one without profiling for performance comparison and another with profiling for analysis. Also, make sure there is no heavy background task running.
160 |             """ + color.ENDC)
161 |     parser.add_argument('-o', '--outdir', action='store', required=True,
162 |                         help='output directory') 
163 |     parser.add_argument('-l', '--log', action='store', required=True,
164 |                         help='log file prefix') 
165 |     parser.add_argument('-b', '--bg', action='store',
166 |                         help='command line of a background task') 
167 |     parser.add_argument('-f', '--fg', action='store',
168 |                         help='command line of a foreground task for benchmarking') 
169 |     parser.add_argument('-c', '--config', action='store',
170 |                         help='run a benchmark with preconfigured setting: `schbench50`, `schbench100`, and `schbench200`, each of which runs `schbench` with 50%%, 100%%, and 200%% CPU utilization, respectively')
171 |     parser.add_argument('-r', '--runtime', action='store', type=int, default=180,
172 |                         help='benchmark running time in seconds (default = 180sec)') 
173 |     parser.add_argument('-p', '--procmon', action='store_true',
174 |                         help='run with profiling on') 
175 |     args = parser.parse_args(argv)
176 | 
177 |     # expand args.config
178 |     if args.config != None:
179 |         expand_args_config(args)
180 | 
181 |     # sanity check
182 |     if args.fg == None:
183 |         parser.print_help()
184 |         print("bench: error: either `-f` or `-c` should be specified.",
185 |               file = sys.stderr)
186 |         exit(1)
187 | 
188 |     return args
189 | 
190 | if __name__ == "__main__":
191 |     args = get_cmd_options(sys.argv[1:])
192 |     run_bench(args)
193 |     wait_for_tasks(args)
194 | 
195 | 


--------------------------------------------------------------------------------
/bin/energyinsight:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import os
  3 | import sys
  4 | import subprocess
  5 | import signal
  6 | import glob
  7 | import argparse
  8 | import psutil
  9 | 
 10 | def get_log_name(args):
 11 |     log = os.path.join(args.outdir, args.log + "-energyprof__.log")
 12 |     return log
 13 | 
 14 | def get_csv_name(args):
 15 |     log = os.path.join(args.outdir, args.log + "-energyinsight__.csv")
 16 |     return log
 17 | 
 18 | def get_nr_cpus(s):
 19 |     nr = 0
 20 |     toks = s.split(',')
 21 |     for tok in toks:
 22 |         cpus = tok.split('-')
 23 |         if len(cpus) == 2:
 24 |             nr += int(cpus[1]) - int(cpus[0]) + 1
 25 |         else:
 26 |             nr += 1
 27 |     return str(nr)
 28 | 
 29 | def parse_config(f):
 30 |     '''
 31 |     ## cpu=0, load=1, util=16
 32 |     '''
 33 |     (cpu, nr_cpu, load, util, per_cpu_util) = (None, None, None, None, None)
 34 |     pos = 0
 35 |     while True:
 36 |         pos = f.tell()
 37 |         line = f.readline()
 38 |         if line == "":
 39 |             return (None, None, None, None, None)
 40 |         toks = line.split()
 41 |         if len(toks) == 0:
 42 |             continue
 43 |         if toks[0] != "##":
 44 |             continue
 45 |         cpu = toks[1].split("=")[1][:-1]
 46 |         nr_cpus = get_nr_cpus(cpu)
 47 |         load = toks[2].split("=")[1][:-1]
 48 |         util = toks[3].split("=")[1]
 49 |         per_cpu_util = str(float(util) / int(nr_cpus))
 50 |         break
 51 |     f.seek(pos)
 52 |     return (cpu, nr_cpus, load, util, per_cpu_util)
 53 | 
 54 | def get_data_in_2_toks(key, index_toks, data_toks):
 55 |     idx = next((i for i, x in enumerate(index_toks) if x == key), None)
 56 |     if idx == None:
 57 |         return None
 58 |     return data_toks[idx]
 59 |     
 60 | def do_parse_turbostat(f):
 61 |     '''
 62 |     Avg_MHz	Busy%	Bzy_MHz	TSC_MHz	IPC	IRQ	POLL	C1	C2	C3	POLL%	C1%	C2%	C3%	CorWatt	PkgWatt
 63 |     506	18.83	2688	3294	1.52	1156	4	15	51	682	0.00	0.26	0.49	80.94	0.51	2.24
 64 |     '''
 65 |     (avg_mhz, bzy_mhz, ipc, corwatt, pkgwatt) = (None, None, None, None, None)
 66 |     pos = 0
 67 |     while True:
 68 |         # index line
 69 |         pos = f.tell()
 70 |         line = f.readline()
 71 |         index_toks= line.split()
 72 |         if len(index_toks) == 0:
 73 |             continue
 74 |         if index_toks[0] != "Avg_MHz":
 75 |             if index_toks[0] == "stress-ng:" and index_toks[1] == "metrc:":
 76 |                 break
 77 |             continue
 78 | 
 79 |         # data line
 80 |         pos = f.tell()
 81 |         line = f.readline()
 82 |         data_toks = line.split()
 83 |         if len(data_toks) == 0:
 84 |             break 
 85 |         if not data_toks[0][0].isnumeric():
 86 |             break 
 87 | 
 88 |         avg_mhz = get_data_in_2_toks("Avg_MHz", index_toks, data_toks)
 89 |         bzy_mhz = get_data_in_2_toks("Bzy_MHz", index_toks, data_toks)
 90 |         ipc = get_data_in_2_toks("IPC", index_toks, data_toks)
 91 |         corwatt = get_data_in_2_toks("CorWatt", index_toks, data_toks)
 92 |         pkgwatt = get_data_in_2_toks("PkgWatt", index_toks, data_toks)
 93 | 
 94 |         break
 95 |     f.seek(pos)
 96 |     return (avg_mhz, bzy_mhz, ipc, corwatt, pkgwatt)
 97 |         
 98 | def parse_turbostat(f):
 99 |     (avg_mhz, bzy_mhz, ipc, corwatt, pkgwatt) = (0.0, 0.0, 0.0, 0.0, 0.0)
100 |     x = 0
101 |     # calculate average of turbostat results for one configuration
102 |     while True:
103 |         (a, b, i, c, p) =  do_parse_turbostat(f)
104 |         if a == None:
105 |             break
106 |         avg_mhz += float(a)
107 |         bzy_mhz += float(b)
108 |         ipc += float(i)
109 |         corwatt += float(c)
110 |         pkgwatt += float(p)
111 |         x += 1
112 |     return (str(avg_mhz/x), str(bzy_mhz/x), str(ipc/x), str(corwatt/x), str(pkgwatt/x))
113 | 
114 | def parse_stress_ng(f):
115 |     '''
116 |     stress-ng: metrc: [3044] stressor       bogo ops real time  usr time  sys time   bogo ops/s     bogo ops/s CPU used per       RSS Max
117 |     stress-ng: metrc: [3044]                           (secs)    (secs)    (secs)   (real time) (usr+sys time) instance (%)          (KB)
118 |     stress-ng: metrc: [3044] cpu               23488    120.00     19.20      0.36       195.73        1200.76         1.02          7408
119 |     '''
120 |     bogo_ops = None
121 |     pos = 0
122 |     while True:
123 |         # index line
124 |         pos = f.tell()
125 |         line = f.readline()
126 |         index_toks= line.split()
127 |         if len(index_toks) == 0:
128 |             continue
129 |         if index_toks[0] != "stress-ng:" or index_toks[1] != "metrc:":
130 |             continue
131 |         line = f.readline()
132 | 
133 |         # data line
134 |         pos = f.tell()
135 |         line = f.readline()
136 |         data_toks = line.split()
137 |         if len(data_toks) != 12: 
138 |             break 
139 |         if not data_toks[8][0].isnumeric():
140 |             break 
141 |         bogo_ops = data_toks[8]
142 |         break
143 |     f.seek(pos)
144 |     return (bogo_ops, )
145 | 
146 | def parse_perf_power(f):
147 |     '''
148 |     Performance counter stats for 'system wide':
149 |     S0        1             285.74 Joules power/energy-pkg/
150 |     '''
151 |     joules = None
152 |     pos = 0
153 |     while True:
154 |         # first line
155 |         pos = f.tell()
156 |         line = f.readline()
157 |         toks= line.split()
158 |         if len(toks) == 0:
159 |             continue
160 |         if toks[0] != "Performance" or toks[1] != "counter" or toks[2] != "stats":
161 |             continue
162 |         line = f.readline()
163 | 
164 |         # second line
165 |         pos = f.tell()
166 |         line = f.readline()
167 |         toks = line.split()
168 |         if len(toks) != 5: 
169 |             break 
170 |         if not toks[2][0].isnumeric():
171 |             break 
172 |         joules = toks[2].replace(",", "")
173 |         line = f.readline()
174 | 
175 |         # third line
176 |         line = f.readline()
177 |         break
178 |     f.seek(pos)
179 |     return (joules, )
180 | 
181 | def gen_data_in_csv(args, data):
182 |     csv = get_csv_name(args)
183 |     with open(csv, "w") as f:
184 |         for row in data:
185 |             row_str = "| ".join(row)
186 |             print("| " + row_str + " |", file = f)
187 | 
188 | def report_energyinsight_in_csv(args):
189 |     log = get_log_name(args)
190 |     data = [("cpu", "nr_cpus", "load", "util", "per_cpu_util",
191 |              "avg_mhz", "bzy_mhz", "ipc", "corwatt", "pkgwatt",
192 |              "bogo_ops",
193 |              "joules",
194 |              "ops/joule")]
195 | 
196 |     with open(log, 'r') as f:
197 |         while True:
198 |             c = parse_config(f)
199 |             if c[0]== None:
200 |                 break
201 | 
202 |             t = parse_turbostat(f)
203 |             if t[0] == None:
204 |                 break
205 | 
206 |             s = parse_stress_ng(f)
207 |             if s[0] == None:
208 |                 break
209 | 
210 |             j = parse_perf_power(f)
211 |             if j[0] == None:
212 |                 break
213 | 
214 |             o = (str(float(s[0]) / float(j[0])), )
215 | 
216 |             row = c + t + s + j + o
217 |             data.append(row)
218 | 
219 |     gen_data_in_csv(args, data)
220 |     # TODO
221 |     # - generate bar graphs per CPU utilization
222 |     #   - x-axis: per-cpu utilization
223 |     #   - x-tics: cpu config
224 |     #   - y1-axis: ops/joule
225 |     #   - y2-axis: ops
226 | 
227 | def get_cmd_options(argv):
228 | 
229 |     parser = argparse.ArgumentParser(
230 |             prog = "energyinsight",
231 |             description = "Report energy usage per CPU load and number of online CPUs",)
232 |     parser.add_argument('-o', '--outdir', action='store', required=True,
233 |                         help='output directory') 
234 |     parser.add_argument('-l', '--log', action='store', required=True,
235 |                         help='output log file prefix') 
236 |     parser.add_argument('-q', '--quiet', action='store_true',
237 |                         help='do not print result to stdout' ) 
238 |     args = parser.parse_args(argv)
239 |     return args
240 | 
241 | if __name__ == "__main__":
242 |     args = get_cmd_options(sys.argv[1:])
243 |     report_energyinsight_in_csv(args)
244 | 
245 | 
246 | 
247 | 


--------------------------------------------------------------------------------
/bin/ginsight:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import os
  3 | import sys
  4 | import argparse
  5 | import csv
  6 | import matplotlib.pyplot as plt
  7 | import zlib
  8 | 
  9 | class time_serise:
 10 |     time_serise = []
 11 |     cdf = []
 12 |     num = 0
 13 | 
 14 |     def __init__(self, ts):
 15 |         self.time_serise = ts
 16 |         self.cdf = ts.copy()
 17 |         self.cdf.sort()
 18 |         self.num = len(ts)
 19 | 
 20 |     def get_min(self):
 21 |         return self.get_percentile(0.0)
 22 | 
 23 |     def get_nmax(self):
 24 |         return self.get_percentile(100.0)
 25 | 
 26 |     def get_median(self):
 27 |         return self.get_percentile(50.0)
 28 | 
 29 |     def get_percentile(self, p):
 30 |         # p = [0:100]
 31 |         i = round( float(self.num - 1) * (p / 100.0) )
 32 |         return self.cdf[i]
 33 | 
 34 |     def get_average(self):
 35 |         return sum(self.cdf) / self.num
 36 | 
 37 | def load_mango_csv(csv_name):
 38 |     def get_sys_info(csv_rd):
 39 |         sys_info = {}
 40 |         keys = rd.__next__()
 41 |         values = rd.__next__()
 42 |         for (k, v) in zip(keys, values):
 43 |             sys_info[k] = v
 44 |         return sys_info
 45 | 
 46 |     with open(csv_name, 'r') as f:
 47 |         rd = csv.reader(f)
 48 |         try:
 49 |             # parse system information
 50 |             sys_info = get_sys_info(rd)
 51 |             # transpose a row-oriented format to column-oriented format
 52 |             cols = list( map(lambda c: [c], rd.__next__()) )
 53 |             for row in rd:
 54 |                 for (i, elm) in enumerate(row):
 55 |                     cols[i].append( float(elm) )
 56 |         except csv.Error as e:
 57 |             sys.exit('Invalid CSV data at {}@{}: {}'.format( \
 58 |                     csv_name, rd.line_num, e))
 59 |         perf_data = {}
 60 |         for cl in cols:
 61 |             k, v = cl[0], time_serise( cl[1:] )
 62 |             perf_data[k] = v
 63 |         return (sys_info, perf_data)
 64 | 
 65 | def get_log_name(args, fea, out):
 66 |     log = os.path.join(args.outdir,
 67 |                        args.prefix + "-ginsight-" + fea + "." + out)
 68 |     return log
 69 | 
 70 | def reset_plot():
 71 |     plt.clf()
 72 |     plt.style.use('default')
 73 |     plt.rcParams['font.size'] = 7
 74 | 
 75 | def hash_rgb_from_str(s):
 76 |     h = zlib.crc32( bytes(s, 'utf-8') )
 77 |     g = (h & 0xFF0000) >> 16
 78 |     b = (h & 0x0000FF) >> 0
 79 |     r = (h & 0x00FF00) >> 8
 80 |     return (float(r)/0xFF, float(g)/0xFF, float(b)/0xFF)
 81 | 
 82 | def gen_dist_fig(args, fea, ts, pss, title, x_label, y_label, min_max):
 83 |     # prepare canvas
 84 |     reset_plot()
 85 |     fig, axs = plt.subplots(nrows=3, ncols=1, figsize=(3.5, 10))
 86 | 
 87 |     # 1) violin plot
 88 |     # - convert pss to quantile for violin plot
 89 |     quan = []
 90 |     for ps in pss:
 91 |         p, k = ps
 92 |         quan.append( p / 100.0 )
 93 |     # - plot
 94 |     ax = axs[0]
 95 |     violin = ax.violinplot([ts.time_serise], showmeans=True,  quantiles=[ quan ])
 96 |     violin['bodies'][0].set_facecolor( hash_rgb_from_str(y_label) )
 97 |     violin['cmeans'].set_edgecolor('red')
 98 |     # - set title
 99 |     ax.set_title(title)
100 |     # - decoration
101 |     ax.set_ylim(bottom = min_max[0], top = min_max[1])
102 |     ax.set_xlabel(x_label)
103 |     ax.set_ylabel(y_label)
104 | 
105 |     # 2) cdf plot
106 |     # - plot
107 |     ax = axs[1]
108 |     ax.plot(range(ts.num), ts.cdf, label='CDF', 
109 |             linewidth=1, color='black', marker='*')
110 |     # - plot stats
111 |     y = ts.get_average()
112 |     ax.plot([0, ts.num], [y, y], label="Average: %.1f" % y)
113 |     for p, l in pss:
114 |         y = ts.get_percentile(p)
115 |         ax.plot([0, ts.num], [y, y], label=l + ": %.1f" % y)
116 |     # - decoration
117 |     ax.set_ylim(bottom = min_max[0], top = min_max[1])
118 |     ax.set_xlabel(x_label)
119 |     ax.set_ylabel(y_label)
120 |     ax.legend()
121 | 
122 |     # 3) time serise plot
123 |     ax = axs[2]
124 |     ax.plot(range(ts.num), ts.time_serise, 
125 |             label='FPS', linewidth=1, color='black')
126 |     # - decoration
127 |     ax.set_ylim(bottom = min_max[0], top = min_max[1])
128 |     ax.set_xlabel(x_label)
129 |     ax.set_ylabel(y_label)
130 | 
131 |     # save to the file
132 |     fig_name = get_log_name(args, fea, "svg")
133 |     plt.savefig(fig_name)
134 |     plt.close()
135 | 
136 | def gen_ts_fig(args, fea, ts, pss, title, x_label, y_label, min_max):
137 |     # clear canvas
138 |     reset_plot()
139 |     plt.figure(figsize=(3.5,3))
140 | 
141 |     # plot FPS overtime
142 |     plt.plot(range(ts.num), ts.time_serise, label='FPS', linewidth=1, color='black')
143 | 
144 |     # decoration
145 |     plt.ylim(bottom = min_max[0], top = min_max[1])
146 |     plt.xlabel(x_label)
147 |     plt.ylabel(y_label)
148 |     plt.title(title)
149 | 
150 |     # save to the file
151 |     fig_name = get_log_name(args, fea + "-ts", "svg")
152 |     plt.savefig(fig_name)
153 |     plt.close()
154 | 
155 | def gen_violin_fig(args, fea, ts, pss, title, x_label, y_label, min_max):
156 |     # clear canvas
157 |     reset_plot()
158 |     plt.figure(figsize=(3.5,3))
159 | 
160 |     # pss to quantile
161 |     quan = []
162 |     for ps in pss:
163 |         p, k = ps
164 |         quan.append( p / 100.0 )
165 | 
166 |     # plot
167 |     violin = plt.violinplot([ts], showmeans=True,  quantiles=[ quan ])
168 |     violin['bodies'][0].set_facecolor( hash_rgb_from_str(y_label) )
169 |     violin['cmeans'].set_edgecolor('red')
170 | 
171 |     # decoration
172 |     plt.ylim(bottom = min_max[0], top = min_max[1])
173 |     plt.xlabel(x_label)
174 |     plt.ylabel(y_label)
175 |     plt.title(title)
176 | 
177 |     # save to the file
178 |     fig_name = get_log_name(args, fea + "-violin", "svg")
179 |     plt.savefig(fig_name)
180 |     plt.close()
181 | 
182 | def gen_cdf_fig(args, fea, ts, pss, title, x_label, y_label, min_max):
183 |     # clear canvas
184 |     reset_plot()
185 |     plt.figure(figsize=(3.5,3))
186 | 
187 |     # plot cdf
188 |     plt.plot(range(ts.num), ts.cdf, label='CDF', 
189 |              linewidth=1, color='black', marker='*')
190 |     # plot stats
191 |     y = ts.get_average()
192 |     plt.plot([0, ts.num], [y, y], label="Average: %.1f" % y)
193 |     for p, l in pss:
194 |         y = ts.get_percentile(p)
195 |         plt.plot([0, ts.num], [y, y], label=l + ": %.1f" % y)
196 | 
197 |     # decoration
198 |     plt.ylim(bottom = min_max[0], top = min_max[1])
199 |     plt.xlabel(x_label)
200 |     plt.ylabel(y_label)
201 |     plt.title(title)
202 |     plt.legend()
203 | 
204 |     # save to the file
205 |     fig_name = get_log_name(args, fea + "-cdf", "svg")
206 |     plt.savefig(fig_name)
207 |     plt.close()
208 | 
209 | def gen_cdf_csv(args, ts, pss, y_label, f):
210 |     # header
211 |     print("%s, %s" % ("{0:^20}".format("Stat"), "{0:^20}".format(y_label)), 
212 |           file = f )
213 | 
214 |     # stat
215 |     y = ts.get_average()
216 |     print("%s, %s" % ("{0:<20}".format("Average"), "{0:>20}".format("%.4f" % y)),
217 |           file = f)
218 |     for p, l in pss:
219 |         y = ts.get_percentile(p)
220 |         print("%s, %s" % ("{0:<20}".format(l), "{0:>20}".format("%.4f" % y)),
221 |               file = f)
222 |     low1 = ts.get_percentile(1.0)
223 |     med  = ts.get_percentile(50.0)
224 |     y = low1 / med
225 |     print("%s, %s" % ("{0:<20}".format("Low1-Med ratio"), 
226 |                       "{0:>20}".format("%.4f" % y)),
227 |           file = f)
228 | 
229 | def report_stat(args, sys_info, perf_data):
230 |     class report_conf:
231 |         key = ""
232 |         pss = []
233 |         title = ""
234 |         x_label = ""
235 |         y_label = ""
236 |         min_max = (0.0, 0.0)
237 |         def __init__(self, k, p, t, x, y, mX):
238 |             self.key = k
239 |             self.pss = p
240 |             self.title = t
241 |             self.x_label = x
242 |             self.y_label = y
243 |             self.min_max = mX
244 | 
245 |     # FIXME: ad-hoc code
246 |     y_fps= 1.0
247 |     if args.prefix == "troy-low-battle-benchmark":
248 |         y_fps= 2.0
249 |     title = args.prefix
250 |     confs = [report_conf("fps", 
251 |                          [(50.0, "Median"), (0.0, "Min"), (100.0, "Max"),
252 |                           (0.1, "Low 0.1%"), (1.0, "Low 1%"), (97.0, "Low 97%"),],
253 |                          title, "frames", "FPS", (0.0, 120.0 * y_fps)),
254 |              report_conf("frametime",
255 |                          [(50.0, "Median"), (0.0, "Min"), (100.0, "Max"),
256 |                           (99.0, "High 1%"), (99.9, "High 0.1%"), ],
257 |                          title, "frames", "frametime (usec)", (0.0, 200000.0)),
258 |              report_conf("cpu_load",
259 |                          [(50.0, "Median"), (0.0, "Min"), (100.0, "Max")],
260 |                          title, "", "cpu load (%)", (0.0, 100.0)),
261 |              report_conf("gpu_load",
262 |                          [(50.0, "Median"), (0.0, "Min"), (100.0, "Max")],
263 |                          title, "", "gpu load (%)", (0.0, 100.0)),
264 |              report_conf("ram_used", 
265 |                          [(50.0, "Median"), (0.0, "Min"), (100.0, "Max")], 
266 |                          title, "", "ram used (GB)", (0.0, 16.0)),]
267 | 
268 |     for c in confs:
269 |         ts = perf_data[c.key]
270 |         # generate cdf stat
271 |         log = get_log_name(args, c.key, "csv")
272 |         with open(log, 'w') as f:
273 |            gen_cdf_csv(args, ts, c.pss, c.y_label, f)
274 |            if args.quiet == False:
275 |                gen_cdf_csv(args, ts, c.pss, c.y_label, sys.stdout)
276 |         # generate distribution graphs
277 |         gen_violin_fig(args, c.key, ts.time_serise, c.pss, c.title, c.x_label, c.y_label, c.min_max)
278 |         gen_cdf_fig(args, c.key, ts, c.pss, c.title, c.x_label, c.y_label, c.min_max)
279 |         gen_ts_fig(args, c.key, ts, c.pss, c.title, c.x_label, c.y_label, c.min_max)
280 |         gen_dist_fig(args, c.key, ts, c.pss, c.title, c.x_label, c.y_label, c.min_max)
281 | 
282 | def get_cmd_options(argv):
283 |     parser = argparse.ArgumentParser(
284 |             prog = "ginsight",
285 |             description = "Generarte a report from MangoHud log")
286 |     parser.add_argument('-l', '--log', action='store', required=True,
287 |                         help='MangoHud log file in a CSV format') 
288 |     parser.add_argument('-o', '--outdir', action='store', required=True,
289 |                         help='output directory') 
290 |     parser.add_argument('-p', '--prefix', action='store', required=True,
291 |                         help='output file prefix') 
292 |     parser.add_argument('-q', '--quiet', action='store_true',
293 |                         help='do not print result to stdout' ) 
294 |     args = parser.parse_args(argv)
295 |     return args
296 | 
297 | if __name__ == "__main__":
298 |     args = get_cmd_options(sys.argv[1:])
299 |     sys_info, perf_data = load_mango_csv(args.log)
300 |     report_stat(args, sys_info, perf_data)
301 | 
302 | 
303 | 
304 | 


--------------------------------------------------------------------------------
/bin/procinsight:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import os
  3 | import sys
  4 | import subprocess
  5 | import signal
  6 | import glob
  7 | import argparse
  8 | import psutil
  9 | 
 10 | MAX_NCPUS = 4096
 11 | 
 12 | def get_log_name(args, mon):
 13 |     log = os.path.join(args.outdir,
 14 |                        args.log + "-procmon-" + mon + "__.log")
 15 |     return log
 16 | 
 17 | def get_csv_name(args, mon, scope):
 18 |     csv = os.path.join(args.outdir,
 19 |                        args.log + "-procinsight-" + mon + "-" + scope + ".csv")
 20 |     return csv
 21 | 
 22 | def transpose_2d_list(ll):
 23 |     return list(zip(*ll))
 24 | 
 25 | def report_stat_in_csv(col_names, stat_ll, f):
 26 |     def get_sep(c, ncol):
 27 |         if c == (ncol - 1):
 28 |             return "\n"
 29 |         else:
 30 |             return ", "
 31 | 
 32 |     def is_float(n):
 33 |         return int(n) != n
 34 | 
 35 |     # print column header
 36 |     ncol = len(stat_ll[0]) + 1
 37 |     col_names = col_names[0:ncol]
 38 |     for c, col in enumerate(col_names):
 39 |         print("{0:^20}".format(col), end=get_sep(c, ncol), file = f)
 40 | 
 41 |     # print tuple by tuple
 42 |     ncol = len(stat_ll[0])
 43 |     nrow = len(stat_ll)
 44 |     for r in range(nrow):
 45 |         key, _= stat_ll[r][0]
 46 |         print("{0:<20}".format(key), end=", ", file = f)
 47 |         for c in range(ncol):
 48 |             _, val= stat_ll[r][c]
 49 |             if val is None:
 50 |                 print("{0:>20}".format("%s" % "N/A"), end=get_sep(c, ncol), file = f)
 51 |             elif is_float(val):
 52 |                 print("{0:>20}".format("%.4f" % val), end=get_sep(c, ncol), file = f)
 53 |             else:
 54 |                 print("{0:>20}".format(int(val)), end=get_sep(c, ncol), file = f)
 55 | 
 56 | def report_procmon_sched_in_csv(args):
 57 |     # get stat
 58 |     (sched_sw_stat, sched_core_stat) = procmon_stat_sched(args)
 59 |     sched_sw_stat = [sched_sw_stat]
 60 |     col_names = ["scope", "system"]
 61 | 
 62 |     # generate csv for system-wide state
 63 |     csv_name = get_csv_name(args, "sched", "sw")
 64 |     with open(csv_name, "w") as f:
 65 |         report_stat_in_csv(col_names, sched_sw_stat, f)
 66 | 
 67 |         if args.quiet == False:
 68 |             print("## Sched_wakeup count", file = sys.stdout)
 69 |             report_stat_in_csv(col_names, sched_sw_stat, sys.stdout)
 70 |             print("\n")
 71 | 
 72 |     # generate csv for per-core state
 73 |     csv_name = get_csv_name(args, "sched", "core")
 74 |     with open(csv_name, "w") as f:
 75 |         report_stat_in_csv(col_names, sched_core_stat, f)
 76 | 
 77 | def report_procmon_cstate_in_csv(args):
 78 |     # get stat
 79 |     (cstate_sw_stat, cstate_core_stat) = procmon_stat_cstate(args)
 80 |     cstate_sw_stat = transpose_2d_list( [cstate_sw_stat] )
 81 |     cstate_core_stat = transpose_2d_list(cstate_core_stat)
 82 | 
 83 |     # generate csv for system-wide state
 84 |     col_names = ["info", "system"]
 85 |     csv_name = get_csv_name(args, "cstate", "sw")
 86 |     with open(csv_name, "w") as f:
 87 |         report_stat_in_csv(col_names, cstate_sw_stat, f)
 88 | 
 89 |         if args.quiet == False:
 90 |             print("## Cstate states", file = sys.stdout)
 91 |             report_stat_in_csv(col_names, cstate_sw_stat, sys.stdout)
 92 |             print("\n")
 93 | 
 94 |     # generate csv for per-core state
 95 |     col_names = ["info"] + \
 96 |             list( map(lambda c: "CPU" + str(c), range(MAX_NCPUS)) )
 97 |     csv_name = get_csv_name(args, "cstate", "core")
 98 |     with open(csv_name, "w") as f:
 99 |         report_stat_in_csv(col_names, cstate_core_stat, f)
100 | 
101 | def report_procmon_energy_in_csv(args):
102 |     # get stat
103 |     energy_sw_stat = transpose_2d_list( [procmon_stat_energy(args)] )
104 | 
105 |     # generate csv for system-wide state
106 |     col_names = ["energy", "system"]
107 |     csv_name = get_csv_name(args, "energy", "sw")
108 |     with open(csv_name, "w") as f:
109 |         report_stat_in_csv(col_names, energy_sw_stat, f)
110 | 
111 |         if args.quiet == False:
112 |             print("## Energy consumption", file = sys.stdout)
113 |             report_stat_in_csv(col_names, energy_sw_stat, sys.stdout)
114 |             print("\n")
115 | 
116 | def report_procmon_perf_in_csv(args):
117 |     # get stat
118 |     perf_sw_stat = transpose_2d_list( [procmon_stat_perf(args)] )
119 | 
120 |     # generate csv for system-wide state
121 |     col_names = ["info", "system"]
122 |     csv_name = get_csv_name(args, "perf", "sw")
123 |     with open(csv_name, "w") as f:
124 |         report_stat_in_csv(col_names, perf_sw_stat, f)
125 | 
126 |         if args.quiet == False:
127 |             print("## Performance counters", file = sys.stdout)
128 |             report_stat_in_csv(col_names, perf_sw_stat, sys.stdout)
129 |             print("\n")
130 | 
131 | def report_procmons_in_csv(args):
132 |     # -s, --sched
133 |     report_procmon_sched_in_csv(args)
134 |     # -c, --cstate
135 |     report_procmon_cstate_in_csv(args)
136 |     # -e, --energy
137 |     report_procmon_energy_in_csv(args)
138 |     # -p, --perf
139 |     report_procmon_perf_in_csv(args)
140 | 
141 | def str_to_nstr(s):
142 |     ns = map(lambda c: c if c != ',' and c != '%' else '', s)
143 |     return "".join( list(ns) )
144 | 
145 | def procmon_stat_sched(args):
146 |     sw_stat, core_stat = [0], [0]
147 |     log = get_log_name(args, "sched")
148 | 
149 |     with open(log, 'r') as f:
150 |         def get_kv(line):
151 |             last_wd = line.split()[-1]
152 |             toks = line.split()[-1].split('=')
153 |             if len(toks) != 2:
154 |                 return ("", "")
155 |             k, v = line.split()[-1].split('=')
156 |             return (k, v)
157 | 
158 |         # read the first line, "cpus=16" and do sanity check 
159 |         # then initialize the per-core stat
160 |         while True:
161 |             k, v = get_kv(f.readline())
162 |             if k == "cpus":
163 |                 ncpus = int(v)
164 |                 break
165 |         core_stat = [0] * ncpus
166 | 
167 |         # read the rest lines, "... target_cpu=006"
168 |         for line in f:
169 |             k, v = get_kv(line)
170 |             if k != "target_cpu":
171 |                 continue
172 |             cpu_id = int(v)
173 |             core_stat[cpu_id] = core_stat[cpu_id] + 1
174 | 
175 |         # update the system-wide stat
176 |         sw_stat[0] = sum(core_stat)
177 | 
178 |     # convert to a list of tuples
179 |     sw_stat2 = list( map(lambda v: ("sched_wakeup", v), sw_stat) )
180 |     core_stat2= list( map(lambda kv: [("CPU" + str(kv[0]), kv[1])], \
181 |                           list( zip(range(len(core_stat)), core_stat) )) )
182 |     return sw_stat2, core_stat2
183 |     
184 | def procmon_stat_cstate(args):
185 |     sw_stat, core_stat = [], [[]] * MAX_NCPUS
186 |     log = get_log_name(args, "cstate")
187 | 
188 |     with open(log, 'r') as f:
189 |         def tokenize(line):
190 |             return line.replace('|', ' ').split()
191 | 
192 |         # get column names
193 |         col_names = []
194 |         for line in f:
195 |             tokens = tokenize(line)
196 |             if tokens[0] == "CPU":
197 |                 for token in tokens:
198 |                     col_names.append(token)
199 |                 break
200 | 
201 |         # parse per-core stat
202 |         for line in f:
203 |             # read stat
204 |             stat_dict = {}
205 |             for (col_name, token) in zip(col_names, tokenize(line)):
206 |                 stat_dict[col_name] = token
207 |             # rearragne stat
208 |             def get_kv(dict, k):
209 |                 return (k, dict[k])
210 |             col_order = ('C0', 'POLL', 'C1', 'C2', 'C3', 'Freq')
211 |             stat_list = []
212 |             for col in col_order:
213 |                 stat_list.append( (col, float(stat_dict[col])) )
214 |             stat_dict['CPU'] = int(stat_dict['CPU'])
215 |             cpu_id = stat_dict['CPU']
216 |             core_stat[cpu_id] = stat_list
217 |         ncpu = cpu_id + 1
218 |         core_stat = core_stat[0:ncpu]
219 | 
220 |         # aggregate per-core state to system-wide stat
221 |         sw_stat = core_stat[0].copy()
222 |         for cs in core_stat[1:]:
223 |             for pos, c in enumerate(cs):
224 |                 sk, sv = sw_stat[pos]
225 |                 ck, cv = c
226 |                 sw_stat[pos] = (sk, sv + cv)
227 |         for pos, s in enumerate(sw_stat):
228 |             sk, sv = sw_stat[pos]
229 |             sw_stat[pos] = (sk, sv / float(ncpu))
230 | 
231 |     return sw_stat, core_stat
232 | 
233 | def parse_log_ef(log, parse_tbl):
234 |     # parse the log
235 |     stat = {}
236 |     with open(log, 'r') as f:
237 |         for line in f:
238 |             for key, (pos, name) in parse_tbl:
239 |                 if line.find(key) >= 0:
240 |                     tokens = line.split()
241 |                     stat[name] = float(str_to_nstr(tokens[pos]))
242 | 
243 |     # sort statistics for easier interpretation
244 |     stat2 = []
245 |     for key, (pos, name) in parse_tbl:
246 |         stat2.append( (name, stat.get(name)) )
247 |     return stat2
248 | 
249 | def procmon_stat_energy(args):
250 |     parse_tbl = ( ("Joules", (2, "J")), 
251 |                   ("seconds", (0, "__seconds")) )
252 | 
253 |     log = get_log_name(args, "energy")
254 |     stat = parse_log_ef(log, parse_tbl)
255 |     # add energy (J/sec)
256 |     sec = float(stat[1][1])
257 |     stat[1] = ("J/sec", stat[0][1]/sec)
258 |     return stat
259 | 
260 | def procmon_stat_perf(args):
261 |     parse_tbl = ( ("seconds time elapsed", (0, "time (sec)")),
262 |                   ("cycles  ", (0, "cycles")),
263 |                   ("instructions", (0, "instructions")),
264 |                   ("instructions", (3, "ipc")),
265 |                   ("stalled-cycles-frontend", (3, "frontend-stall (%)")),
266 |                   ("stalled-cycles-backend", (3, "backend-stall (%)")),
267 |                   ("branches", (0, "branches")),
268 |                   ("branch-misses", (3, "branch-misses (%)")),
269 |                   ("page-faults", (0, "page-faults")),
270 |                   ("context-switches", (0, "context-switches")),
271 |                   ("cpu-migrations", (0, "cpu-migrations")), )
272 | 
273 |     log = get_log_name(args, "perf")
274 |     stat = parse_log_ef(log, parse_tbl)
275 |     return stat
276 | 
277 | def get_cmd_options(argv):
278 |     parser = argparse.ArgumentParser(
279 |             prog = "procinsight",
280 |             description = "Report CPU statistics and system-wide scheduling statistics",)
281 |     parser.add_argument('-o', '--outdir', action='store', required=True,
282 |                         help='output directory') 
283 |     parser.add_argument('-l', '--log', action='store', required=True,
284 |                         help='log file prefix') 
285 |     parser.add_argument('-q', '--quiet', action='store_true',
286 |                         help='do not print result to stdout' ) 
287 |     args = parser.parse_args(argv)
288 |     return args
289 | 
290 | if __name__ == "__main__":
291 |     args = get_cmd_options(sys.argv[1:])
292 |     report_procmons_in_csv(args)
293 | 
294 | 
295 | 
296 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | `vapormark` is a benchmark framework developed for measuring various
  2 | performance metrics (e.g., throughput, latency, and tail latency) and the
  3 | process states (e.g., backend stall, energy consumption) while running a
  4 | program on Linux. It especially targets `SteamOS` -- a Linux-based gaming
  5 | device --but most features are genetically useful in regular Linux
  6 | environments.
  7 | 
  8 | 
  9 | Three Phases
 10 | ------------
 11 | `vapormark` consists of three phases:
 12 |   1. running a benchmark (i.e., collecting performance data)
 13 |   2. analyzing the collected data
 14 |   3. generating a report. 
 15 | 
 16 | Only the first step should run on a target device, such as `SteamDeck`. The
 17 | others can be run on almost any Linux box.
 18 | 
 19 | External Dependencies
 20 | ---------------------
 21 | `vapormark` uses the following in each phase:
 22 |   - running a benchmark (i.e., collecting performance data)
 23 |       - [schbench](https://kernel.googlesource.com/pub/scm/linux/kernel/git/mason/schbench/),
 24 |         [stress-ng](https://github.com/ColinIanKing/stress-ng), and
 25 |         [gbench](https://github.com/Igalia/vapormark/tree/main/micro-bench/gbench)
 26 |         for micro-benchmarking of scheduler performance
 27 |       - [MangoHud](https://github.com/flightlessmango/MangoHud) for measuring
 28 |         FPS (frame per second) during a running game
 29 |       - `strace`, `trace-cmd`, `cpupower`, `turbostat`, `chcpu`, `taskset`, and `perf` for collecting processor states
 30 |   - analyzing the collected data
 31 |       - `matplotlib`, `graphviz`, and `numpy` python library for generating graphs
 32 |   - generating a report
 33 |       - `pandoc` for generating a report in HTML format
 34 |   - for all phases
 35 |       - `python3`
 36 | 
 37 | Installation
 38 | ------------
 39 | Just clone this repository and hit `make` on the top of the directory. The
 40 | build procedure will clone and build `schbench`. All the binaries will be put
 41 | under the `vapormark/bin` directory.
 42 | 
 43 | If you want to measure FPS, install `MangoHud`. For `SteamDeck`, please refer
 44 | the following steps:
 45 |   1. Press `STEAM` button then choose `Power -> Switch to Desktop`
 46 |   2. On Plasma Desktop, launch `Discover Center`. Then find and install `MangoHud`
 47 |   3. Copy the ManguHud configuration file, `vapormark/config/MangoHud.conf` to
 48 |      `/home/deck/.config/MangoHud/MangoHud.conf`. This is the minimal MangoHud
 49 |      configuration that `vapormark` expects. MangoHud logs will be stored to
 50 |      the `/home/deck/mangologs-vapormark/` directory.
 51 | 
 52 | Running a benchmark and collecting performance data
 53 | ---------------------------------------------------
 54 | 
 55 | #### `scmon`: collecting system usage of a process tree
 56 | 
 57 | `scmon` collects a system call usage of a process tree. It generates per-task
 58 | system call trace file under `OUTDIR` with prefix `LOG` and suffix `-scmon.*`.
 59 | It imposes noticeable performance overhead so it should not be used when
 60 | collecting performance numbers. `scmon` is useful to understand the high-level
 61 | behavioral traits of an application.
 62 | 
 63 | ```
 64 | usage: scmon [-h] -o OUTDIR -l LOG [-p PID] [-r ROOT] [-n NAME] [-c CMD [CMD ...]]
 65 | 
 66 | Collect system call usage statistics of a program
 67 | 
 68 | options:
 69 |   -h, --help            show this help message and exit
 70 |   -o OUTDIR, --outdir OUTDIR
 71 |                         output directory
 72 |   -l LOG, --log LOG     log file prefix
 73 |   -p PID, --pid PID     process id to monitor
 74 |   -r ROOT, --root ROOT  root process id to monitor (all decendents will be monitored)
 75 |   -n NAME, --name NAME  name of a process to monitor
 76 |   -c CMD [CMD ...], --cmd CMD [CMD ...]
 77 |                         command to execute
 78 | 
 79 | For example, 'scmon -o log -l steam -n steam' to log the system call usage of 'steam' 
 80 | and all its decendents under log/steam*-scmon*.
 81 | ```
 82 | 
 83 | #### `procmon`: collecting processor and scheduling statistics
 84 | 
 85 | `procmon` collects four types of information: 1) scheduler's wakeup events, 2)
 86 | CPU's c-state, 3) CPU's energy consumption, and 4) processor's performance
 87 | monitoring data (e.g., instruction per cycle). Similar to `scmon`, it generates
 88 | logs under `OUTDIR` with prefix `LOG` and suffix `-procmon.*`. It collects
 89 | information while it runs. The runtime overhead is not marginal so it can be
 90 | run with an application level benchmark (like game). However, it is not
 91 | recommended with a micro-benchmark (`schdbench`), which is much more sensitive
 92 | to any noises.
 93 | 
 94 | ```
 95 | usage: procmon [-h] -o OUTDIR -l LOG [-s] [-c] [-e] [-p] [-a]
 96 | 
 97 | Collect CPU statistics and system-wide scheduling statistics
 98 | 
 99 | options:
100 |   -h, --help            show this help message and exit
101 |   -o OUTDIR, --outdir OUTDIR
102 |                         output directory
103 |   -l LOG, --log LOG     log file prefix
104 |   -s, --sched           trace wake-up events of process scheduler
105 |   -c, --cstate          trace c-state of all CPUs
106 |   -e, --energy          trace energy consumption of all CPUs
107 |   -p, --perf            trace performance statistics of all CPUs
108 |   -a, --all             trace all statistics
109 | 
110 | procmon internally uses 'trace-cmd', 'cpupower', and 'perf'.
111 | ```
112 | 
113 | #### `mbench`: running a micro-benchmark
114 | `mbench` is a wrapper which runs `schbench` with a pre-configured settings. For
115 | convenience, it launches `procmon` if necessary. However, to get accurate
116 | performance results, it is recommended `mbench` with and without `procmon`.
117 | Also, make sure there is no heavy background tasks: for example, in `SteamOS`,
118 | `steam`, `mangoapp`, `gamemoded`, `gamescope`, and `steamwebhelper`.
119 | 
120 | ```
121 | usage: mbench [-h] -o OUTDIR -l LOG [-b BG] [-f FG] [-c CONFIG] [-r RUNTIME] [-p]
122 | 
123 | Run a micro-benchmark with a pre-configured setting
124 | 
125 | options:
126 |   -h, --help            show this help message and exit
127 |   -o OUTDIR, --outdir OUTDIR
128 |                         output directory
129 |   -l LOG, --log LOG     log file prefix
130 |   -b BG, --bg BG        command line of a background task
131 |   -f FG, --fg FG        command line of a foreground task for benchmarking
132 |   -c CONFIG, --config CONFIG
133 |                         run a benchmark with preconfigured setting: `schbench50`, 
134 |                         `schbench100`, and `schbench200`, each of which runs `schbench` with 
135 |                         50%, 100%, and 200% CPU utilization, respectively
136 |   -r RUNTIME, --runtime RUNTIME
137 |                         benchmark running time in seconds (default = 90sec)
138 |   -p, --procmon         run with profiling on
139 | 
140 | Performance monitoring (-p) WILL interfere the results of micro-benchmark. Do NOT use -p when 
141 | you collect performance results. Instead, run the same benchmark twice: one without profiling 
142 | for performance comparison and another with profiling for analysis. Also, make sure there is 
143 | no heavy background task running. 
144 | ```
145 | 
146 | #### `MangoHud`: measuring FPS, CPU/GPU utilization, etc.
147 | Launching, starting, and stopping `MangoHud` is not integrated with
148 | `vapormark`. Hence `vapormark` just follows the standard `MangoHud` usage.
149 | Especially in `SteamDeck`, please refer to the following procedure:
150 | 
151 | - For a game to FPS logging, go to `Properties -> General -> Launch Options`
152 |   and add `mangohud %command%`. *The game must be launched in **Desktop Mode
153 |   (not in Gaming Mode)** to log FPS and other system stats.*
154 | 
155 | - Now, you will see the overlay window showing FPS when launching the game. You
156 |   can start and stop FPS logging by hitting `Shift_L+F2`. The log will be stored
157 |   at `/home/deck/mangologs-vapormark`. Some games hang when MangoHud is enabled.
158 |   Other useful MangoHud shortcuts are as follows:
159 | 
160 |     ```
161 |     Shift_L+F2 : Toggle Logging
162 |     Shift_L+F4 : Reload Config
163 |     Shift_R+F12 : Toggle Hud        
164 |      ```
165 | 
166 | - Once you finish FPS logging by hitting `Shift_L+F2`, `MangoHud` will generate
167 |   a `csv` log file under  `/home/deck/mangologs-vapormark`. Please copy and
168 |   rename it ending with `-mangohud.csv` for analysis and report generation.
169 | 
170 | 
171 | - Following games provide in-game benchmarks:
172 | 
173 |   | Game                   | How to start an in-game benchmark |
174 |   | :--------------------- | :-------------------------------- |
175 |   | Far Cry: New Dawn      | `Options -> Benchmark` |
176 |   | A Total War Saga: Troy | `Options -> Graphics -> Advanced -> Benchmark` |
177 |   | Cyber Punk 2077        | `Settings -> Graphics -> Quick Preset, Run Benchmark` |
178 |   | Factorio               | On terminal: `factorio --benchmark` [map.zip](https://factoriobox.1au.us/map/download/91c009e61f44c3c532f7152b0501ea0fc920723148dd1c38c4da129eb9d399f9.zip) `--benchmark-ticks 1000 --disable-audio` |
179 | 
180 | 
181 | #### `schedmon`: collecting the detailed scheduling activities
182 | `schedmon` collects the detailed system-wide scheduling activities. It
183 | internally relies on `perf sched record` command. To collect the kernel
184 | symbol names correctly, please run the following.
185 | 
186 | ```
187 | $> echo 1 > /proc/sys/kernel/kptr_restrict
188 | ```
189 | 
190 | 
191 | ```
192 | usage: schedmon [-h] -o OUTDIR -l LOG
193 | 
194 | Collect the detailed scheduler activity internally using `perf sched`
195 | 
196 | options:
197 |   -h, --help            show this help message and exit
198 |   -o OUTDIR, --outdir OUTDIR
199 |                         output directory
200 |   -l LOG, --log LOG     log file prefix
201 | ```
202 | 
203 | #### `energyprof`: collecting energy and performance statistics according to CPU load
204 | `energyprof` measures energy consumption and performance while running
205 | `stress-ng` with various CPU loads and various numbers of online CPUs. It
206 | internally relies on `turbostat`, `stress-ng`, `chcpu`, `taskset`, and `perf`.
207 | 
208 | ```
209 | usage: energyprof [-h] -c NUM_CPUS [-t TIME_SEC] -o OUTDIR -l LOG
210 | 
211 | Collect energy and performance statistics according to CPU load
212 | 
213 | options:
214 |   -h, --help            show this help message and exit
215 |   -c NUM_CPUS, --num_cpus NUM_CPUS
216 |                         number of CPUs of this machine
217 |   -t TIME_SEC, --time_sec TIME_SEC
218 |                         time in sec to run stress-_ng
219 |   -o OUTDIR, --outdir OUTDIR
220 |                         output directory
221 |   -l LOG, --log LOG     log file prefix
222 | ```
223 | 
224 | Analyzing the collected data
225 | ----------------------------
226 | Once the performance data is collected, it is time to analyze the results. In
227 | this phase, `vapormark` transforms various log files into the standard CSV
228 | format and produces the latency distribution graphs. Specifically, it provides
229 | the following commands. The generated files have a suffix of its program,
230 | `*-scinsight*`, `*-procinsight*`, `*-ginsight*`, and `*-energyprof*`.
231 | 
232 | #### `scinsight`: analyzing `scmon` logs
233 | ```
234 | usage: scinsight [-h] -o OUTDIR -l LOG [-q]
235 | 
236 | Report system call usage statistics of a program
237 | 
238 | options:
239 |   -h, --help            show this help message and exit
240 |   -o OUTDIR, --outdir OUTDIR
241 |                         output directory
242 |   -l LOG, --log LOG     log file prefix, or path to directory containing log files
243 |   -q, --quiet           do not print result to stdout
244 | ```
245 | 
246 | 
247 | 
248 | #### `procinsight`: analyzing `procmon` logs
249 | ```
250 | usage: procinsight [-h] -o OUTDIR -l LOG [-q]
251 | 
252 | Report CPU statistics and system-wide scheduling statistics
253 | 
254 | options:
255 |   -h, --help            show this help message and exit
256 |   -o OUTDIR, --outdir OUTDIR
257 |                         output directory
258 |   -l LOG, --log LOG     log file prefix
259 |   -q, --quiet           do not print result to stdout
260 | ```
261 | 
262 | 
263 | 
264 | #### `ginsight`: analyzing a `MangoHud` log
265 | ```
266 | usage: ginsight [-h] -l LOG -o OUTDIR -p PREFIX [-q]
267 | 
268 | Generarte a report from MangoHud log
269 | 
270 | options:
271 |   -h, --help            show this help message and exit
272 |   -l LOG, --log LOG     MangoHud log file in a CSV format
273 |   -o OUTDIR, --outdir OUTDIR
274 |                         output directory
275 |   -p PREFIX, --prefix PREFIX
276 |                         output file prefix
277 |   -q, --quiet           do not print result to stdout
278 | ```
279 | 
280 | #### `schedinsight`: analyzing the results of `schedmon`
281 | 
282 | ```
283 | usage: schedinsight [-h] -l LOGDIR -p PREFIX -o OUTPUT [-i IMGTYPE] [-k] [-s MINSCHED] [-t TIMELIMIT]
284 | 
285 | Report the detailed analysis of scheduliing activities collected by `perf sched record`
286 | 
287 | options:
288 |   -h, --help            show this help message and exit
289 |   -l LOGDIR, --logdir LOGDIR
290 |                         a log directory
291 |   -p PREFIX, --prefix PREFIX
292 |                         log file prefix
293 |   -o OUTPUT, --output OUTPUT
294 |                         a target report file name in markdown format
295 |   -i IMGTYPE, --imgtype IMGTYPE
296 |                         type of image format (png, svg)
297 |   -k, --pickle          use pickle whenever possible
298 |   -s MINSCHED, --minsched MINSCHED
299 |                         set the minimum number of schedules for task analysis
300 |   -t TIMELIMIT, --timelimit TIMELIMIT
301 |                         time limit to draw a graph in seconds
302 | ```
303 | 
304 | #### `energyinsight`: analyzing the results of `energyprof`
305 | 
306 | ```
307 | usage: energyinsight [-h] -o OUTDIR -l LOG [-q]
308 | 
309 | Report energy usage per CPU load and number of online CPUs
310 | 
311 | options:
312 |   -h, --help            show this help message and exit
313 |   -o OUTDIR, --outdir OUTDIR
314 |                         output directory
315 |   -l LOG, --log LOG     output log file prefix
316 |   -q, --quiet           do not print result to stdout
317 | ```
318 | 
319 | 
320 | 
321 | Generating a (comparison) report
322 | --------------------------------
323 | 
324 | `vapormark` provides a reporting feature that compares the results of multiple
325 | configurations. This is especially useful when checking the impact of a certain
326 | optimization. When more than one log directories are given (with multiple -l
327 | options), `report` uses the logs in the first directory as a baseline and shows
328 | the relative delta in percent. Note that `report` does not support the
329 | comparison of `schedmon` logs yet.
330 | 
331 | ```
332 | usage: report [-h] -l LOGDIR -p PREFIX -o OUTPUT [-f] [-g]
333 | 
334 | Generate a report of given log directories
335 | 
336 | options:
337 |   -h, --help            show this help message and exit
338 |   -l LOGDIR, --logdir LOGDIR
339 |                         a log directory. When mulltiple `-l` options are given, comparison 
340 |                         will be reported using the first one as a baseline.
341 |   -p PREFIX, --prefix PREFIX
342 |                         log file prefix for report generation
343 |   -o OUTPUT, --output OUTPUT
344 |                         target report file name in markdown format
345 |   -f, --force           force to regenerate all CSV files
346 |   -g, --debug           print out debug messages
347 | 
348 | For example, `report -l base_dir -l cmp_dir -p game1 -o report.md` compares `game1` logs 
349 | in two directories -- `base_dir` and `cmp_dir` -- and generates `report.md`. `base_dir`
350 | is used in calculating the relative difference. When only one log directory is given, 
351 | only the summary of results without comparison is provided. It expects certain file 
352 | extensions: `*.factorio_out` for factorio benchmark and `*.schbench_out` for schbench 
353 | benchmark.
354 | ```
355 | 
356 | Misc tools
357 | ----------
358 | 
359 | #### `sched-config`: save and restore key scheduler parameters from debugfs
360 | 
361 | ```
362 | usage: sched-config [-h] [-g GET] [-s SET]
363 | 
364 | Set or get the scheduler config parameters
365 | 
366 | options:
367 |   -h, --help         show this help message and exit
368 |   -g GET, --get GET  Get the scheculer parameters
369 |   -s SET, --set SET  Set the scheculer parameters
370 | sched-config: error: either '-g' or '-s' should be specified
371 | ```
372 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
  5 |  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 |                             Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Lesser General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                     GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 |                             NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 |                      END OF TERMS AND CONDITIONS
281 | 
282 |             How to Apply These Terms to Your New Programs
283 | 
284 |   If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 | 
288 |   To do so, attach the following notices to the program.  It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 | 
293 |     JoulesWatch
294 |     Copyright (C) 2023  kernel-research
295 | 
296 |     This program is free software; you can redistribute it and/or modify
297 |     it under the terms of the GNU General Public License as published by
298 |     the Free Software Foundation; either version 2 of the License, or
299 |     (at your option) any later version.
300 | 
301 |     This program is distributed in the hope that it will be useful,
302 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
303 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304 |     GNU General Public License for more details.
305 | 
306 |     You should have received a copy of the GNU General Public License along
307 |     with this program; if not, write to the Free Software Foundation, Inc.,
308 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 | 
310 | Also add information on how to contact you by electronic and paper mail.
311 | 
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 | 
315 |     Gnomovision version 69, Copyright (C) year name of author
316 |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 |     This is free software, and you are welcome to redistribute it
318 |     under certain conditions; type `show c' for details.
319 | 
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License.  Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 | 
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary.  Here is a sample; alter the names:
328 | 
329 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 |   `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 | 
332 |   <signature of Ty Coon>, 1 April 1989
333 |   Ty Coon, President of Vice
334 | 
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs.  If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library.  If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 | 


--------------------------------------------------------------------------------
/micro-bench/gbench/gbench.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * gbench.c
  3 |  *
  4 |  * Copyright (C) 2023 Igalia
  5 |  * Changwoo Min <changwoo@igalia.com>
  6 |  *
  7 |  * GPLv2, portions copied from schbench (and potentially from kernel and fio)
  8 |  */
  9 | 
 10 | #include <stdio.h>
 11 | #include <stdlib.h>
 12 | #include <pthread.h>
 13 | #include <fcntl.h>
 14 | #include <unistd.h>
 15 | #include <errno.h>
 16 | #include <getopt.h>
 17 | #include <sys/time.h>
 18 | #include <time.h>
 19 | #include <string.h>
 20 | #include <math.h>
 21 | #include <linux/futex.h>
 22 | #include <sys/syscall.h>
 23 | #include <sys/sysinfo.h>
 24 | #include <sys/epoll.h>
 25 | #include <unistd.h>
 26 | 
 27 | enum {
 28 | 	IPC_FUTEX,
 29 | 	IPC_PIPE_EPOLL,
 30 | 	IPC_SOCK_SELECT,
 31 | 
 32 | 	IPC_MAX,
 33 | };
 34 | 
 35 | enum {
 36 | 	USEC_PER_SEC			= 1000000L,
 37 | };
 38 | 
 39 | static const char *ipc_str[3] = {
 40 | 	"futex", "pipe", "sock",
 41 | };
 42 | 
 43 | struct opt;
 44 | 
 45 | union pipe_fds {
 46 | 	int			fds[2];
 47 | 	struct {
 48 | 		int		rfd;
 49 | 		int		wfd;
 50 | 	};
 51 | };
 52 | 
 53 | struct pipe_pair {
 54 | 	union pipe_fds		rx;
 55 | 	union pipe_fds		tx;
 56 | };
 57 | 
 58 | #define MAX_EPOLL_EVENTS	64
 59 | 
 60 | struct epoll_ipc {
 61 | 	int			fd;
 62 | 	int			nfds;
 63 | 	struct epoll_event	events[MAX_EPOLL_EVENTS];
 64 | };
 65 | 
 66 | union ipc {
 67 | 	int			futex;
 68 | 	struct pipe_pair	pipe;
 69 | 	struct epoll_ipc	epoll;
 70 | };
 71 | 
 72 | #define MAIN_ID			(-1)
 73 | 
 74 | struct task_stat {
 75 | 	__u64			cnt;
 76 | 	__u64			avg_run_time;
 77 | 	__u64			frq_run_time;
 78 | 	__u64			avg_wait_time;
 79 | 	__u64			frq_wait_time;
 80 | };
 81 | 	
 82 | struct task_data {
 83 | 	struct opt *		opt;
 84 | 	pthread_t		tid;
 85 | 	pid_t			pid;
 86 | 	int			id;
 87 | 	union ipc		ipc;
 88 | 	__u64			run_time;
 89 | 	__u64			wait_time;
 90 | 	__u64 *			data;
 91 | 	struct task_stat	stat;
 92 | };
 93 | 
 94 | struct opt {
 95 | 	int			ipc_type;
 96 | 	struct task_data	main;
 97 | 	int			nr_workers;
 98 | 	struct task_data *	workers;
 99 | 	__u64			cache_footprint_kb; /* def: 256kb */
100 | 	int			benchmark_time_sec; /* def: 60sec */
101 | };
102 | 
103 | #ifdef DEBUG
104 | #define debug(fmt, ...)  printf(fmt, __VA_ARGS__)
105 | #else
106 | #define debug(fmt, ...)
107 | #endif
108 | 
109 | /* the message threads flip this to true when they decide runtime is up */
110 | static volatile unsigned long stopping = 0;
111 | 
112 | static struct timeval base_time;
113 | static __thread __u64 stick;
114 | 
115 | /* we're so fancy we make our own futex wrappers */
116 | #define FUTEX_BLOCKED 0
117 | #define FUTEX_RUNNING 1
118 | 
119 | static int futex(int *uaddr, int futex_op, int val,
120 | 		 const struct timespec *timeout, int *uaddr2, int val3)
121 | {
122 | 	return syscall(SYS_futex, uaddr, futex_op, val, timeout, uaddr2, val3);
123 | }
124 | 
125 | /*
126 |  * wakeup a process waiting on a futex, making sure they are really waiting
127 |  * first
128 |  */
129 | static void fpost(int *futexp)
130 | {
131 | 	int s;
132 | 
133 | 	if (__sync_bool_compare_and_swap(futexp, FUTEX_BLOCKED,
134 | 					 FUTEX_RUNNING)) {
135 | 		s = futex(futexp, FUTEX_WAKE_PRIVATE, 1, NULL, NULL, 0);
136 | 		if (s  == -1) {
137 | 			perror("FUTEX_WAKE");
138 | 			exit(1);
139 | 		}
140 | 	}
141 | }
142 | 
143 | /*
144 |  * wait on a futex, with an optional timeout.  Make sure to set
145 |  * the futex to FUTEX_BLOCKED beforehand.
146 |  *
147 |  * This will return zero if all went well, or return -ETIMEDOUT if you
148 |  * hit the timeout without getting posted
149 |  */
150 | static int fwait(int *futexp, struct timespec *timeout)
151 | {
152 | 	int s;
153 | 	while (1) {
154 | 		/* Is the futex available? */
155 | 		if (__sync_bool_compare_and_swap(futexp, FUTEX_RUNNING,
156 | 						 FUTEX_BLOCKED)) {
157 | 			break;      /* Yes */
158 | 		}
159 | 		/* Futex is not available; wait */
160 | 		s = futex(futexp, FUTEX_WAIT_PRIVATE, FUTEX_BLOCKED, timeout, NULL, 0);
161 | 		if (s == -1 && errno != EAGAIN) {
162 | 			if (errno == ETIMEDOUT)
163 | 				return -ETIMEDOUT;
164 | 			perror("futex-FUTEX_WAIT");
165 | 			exit(1);
166 | 		}
167 | 	}
168 | 	return 0;
169 | }
170 | 
171 | static __u64 get_time_usec(void)
172 | {
173 | 	struct timeval now;
174 | 	signed long sec, usec;
175 | 
176 | 	gettimeofday(&now, NULL);
177 | 
178 | 	sec = now.tv_sec - base_time.tv_sec;	
179 | 	usec = now.tv_usec - base_time.tv_usec;	
180 | 
181 | 	return sec*1000000 + usec;
182 | }
183 | 
184 | static __u64 start_tick(void)
185 | {
186 | 	__u64 cur_tick = get_time_usec();
187 | 	__u64 diff_tick = cur_tick - stick;
188 | 	stick = cur_tick;
189 | 	return diff_tick;
190 | }
191 | 
192 | static __u64 get_cur_tick(void)
193 | {
194 | 	return get_time_usec() - stick;
195 | }
196 | 
197 | static __u64 get_matrix_size(struct opt *opt)
198 | {
199 | 	return sqrt(opt->cache_footprint_kb * 1024 / 3 / sizeof(__u64));
200 | }
201 | 
202 | /*
203 |  * multiply two matrices in a naive way to emulate some cache footprint
204 |  */
205 | static void do_some_math(struct task_data *t)
206 | {
207 | 	__u64 matrix_size = get_matrix_size(t->opt);
208 | 	__u64 i, j, k;
209 | 	__u64 *m1, *m2, *m3;
210 | 
211 | 	m1 = &t->data[0];
212 | 	m2 = &t->data[matrix_size * matrix_size];
213 | 	m3 = &t->data[2 * matrix_size * matrix_size];
214 | 
215 | 	for (i = 0; i < matrix_size; i++) {
216 | 		for (j = 0; j < matrix_size; j++) {
217 | 			m3[i * matrix_size + j] = 0;
218 | 
219 | 			for (k = 0; k < matrix_size; k++)
220 | 				m3[i * matrix_size + j] +=
221 | 					m1[i * matrix_size + k] *
222 | 					m2[k * matrix_size + j];
223 | 		}
224 | 	}
225 | }
226 | 
227 | static __u64 calc_avg(__u64 old_val, __u64 new_val)
228 | {
229 | 	/* EWMA = (0.75 * old) + (0.25 * new) */
230 | 	return (old_val - (old_val >> 2)) + (new_val >> 2);
231 | }
232 | 
233 | static __u64 calc_avg_freq(__u64 old_freq, __u64 interval)
234 | {
235 | 	__u64 new_freq, ewma_freq;
236 | 
237 | 	new_freq = USEC_PER_SEC / interval;
238 | 	ewma_freq = calc_avg(old_freq, new_freq);
239 | 
240 | 	return ewma_freq;
241 | }
242 | 
243 | static void update_stat(struct task_data *t, __u64 wait_int, __u64 run_dur)
244 | {
245 | 	struct task_stat *s = &t->stat;
246 | 
247 | 	s->avg_run_time = calc_avg(s->avg_run_time, run_dur);
248 | 	s->frq_run_time = calc_avg_freq(s->frq_run_time, run_dur+wait_int);
249 | 
250 | 	s->avg_wait_time = calc_avg(s->avg_wait_time, wait_int);
251 | 	s->frq_wait_time = calc_avg_freq(s->frq_wait_time, run_dur+wait_int);
252 | }
253 | 
254 | static void do_work(struct task_data *t)
255 | {
256 | 	__u64 wait_interval, run_duration;
257 | 
258 | 	/* do some computation */
259 | 	wait_interval = start_tick();
260 | 	do {
261 | 		do_some_math(t);
262 | 	} while (t->run_time >= (run_duration = get_cur_tick()));
263 | 
264 | 	/* update statistics */
265 | 	update_stat(t, wait_interval, run_duration);
266 | }
267 | 
268 | static void worker_create_ipc(struct task_data *w)
269 | {
270 | 	int ret = 0;
271 | 
272 | 	switch(w->opt->ipc_type) {
273 | 	case IPC_FUTEX:
274 | 		/* do nothing */
275 | 		break;
276 | 	case IPC_PIPE_EPOLL:
277 | 		/* create a pair of pipe -- rx and tx */
278 | 		ret = pipe(w->ipc.pipe.rx.fds);
279 | 		if (ret == -1) {
280 | 			perror("failed to create an rx pipe\n");
281 | 			exit(1);
282 | 		}
283 | 		ret = pipe(w->ipc.pipe.tx.fds);
284 | 		if (ret == -1) {
285 | 			perror("failed to create a tx pipe\n");
286 | 			exit(1);
287 | 		}
288 | 		break;
289 | 	case IPC_SOCK_SELECT:
290 | 		break;
291 | 	default:
292 | 		fprintf(stderr, "incorrect ipc type: %d\n", w->opt->ipc_type);
293 | 		exit(1);
294 | 		break;
295 | 	}
296 | }
297 | 
298 | static void worker_ping_pong_futex(struct task_data *w, struct task_data *m)
299 | {
300 | 	/* set myself to blocked */
301 | 	w->ipc.futex = FUTEX_BLOCKED;
302 | 
303 | 	/* let the main know */
304 | 	fpost(&m->ipc.futex);
305 | 
306 | 	/*
307 | 	 * don't wait if the main threads are shutting down,
308 | 	 * they will never kick us fpost has a full barrier, so as long
309 | 	 * as the message thread walks his list after setting stopping,
310 | 	 * we shouldn't miss the wakeup
311 | 	 */
312 | 	if (!stopping) {
313 | 		/* if he hasn't already woken us up, wait */
314 | 		fwait(&w->ipc.futex, NULL);
315 | 	}
316 | }
317 | 
318 | static void worker_ping_pong_pipe(struct task_data *w, struct task_data *m)
319 | {
320 | 	int wr_id = w->id;
321 | 
322 | 	/* let the main know */
323 | 	ssize_t w_ret = write(w->ipc.pipe.tx.wfd, &wr_id, sizeof(wr_id));
324 | 	if (w_ret != sizeof(wr_id)) {
325 | 		perror("worker write failed");
326 | 		exit(1);
327 | 	}
328 | 
329 | 	/*
330 | 	 * don't wait if the main threads are shutting down,
331 | 	 * they will never kick us fpost has a full barrier, so as long
332 | 	 * as the message thread walks his list after setting stopping,
333 | 	 * we shouldn't miss the wakeup
334 | 	 */
335 | 	if (!stopping) {
336 | 		/* if he hasn't already woken us up, wait */
337 | 		ssize_t r = read(w->ipc.pipe.rx.rfd, &wr_id, sizeof(wr_id));
338 | 		if (r != sizeof(wr_id)) {
339 | 			perror("worker read failed");
340 | 			exit(1);
341 | 		}
342 | 	}
343 | }
344 | 
345 | static int worker_ping_pong(struct task_data *w, struct task_data *m)
346 | {
347 | 	/* full memory barrier */
348 | 	__sync_synchronize();
349 | 
350 | 	if (stopping)
351 | 		return 1;
352 | 
353 | 	switch(w->opt->ipc_type) {
354 | 	case IPC_FUTEX:
355 | 		worker_ping_pong_futex(w, m);
356 | 		break;
357 | 	case IPC_PIPE_EPOLL:
358 | 		worker_ping_pong_pipe(w, m);
359 | 		break;
360 | 	case IPC_SOCK_SELECT:
361 | 		break;
362 | 	default:
363 | 		fprintf(stderr, "incorrect ipc type: %d\n", w->opt->ipc_type);
364 | 		exit(1);
365 | 		break;
366 | 	}
367 | 
368 | 	return 0;
369 | }
370 | 
371 | static void *worker_thr(void *arg)
372 | {
373 | 	struct task_data *w = arg;
374 | 	struct task_data *m = &w->opt->main;
375 | 
376 | 	w->pid = gettid();
377 | 
378 | 	for (w->stat.cnt = 0; 1; w->stat.cnt++) {
379 | 		debug("work[%lx] = %llu\n", w->tid, w->stat.cnt);
380 | 		
381 | 		/* exchange a heartbeat signal */
382 | 		if (worker_ping_pong(w, m))
383 | 			break;
384 | 
385 | 		/* do some computation */
386 | 		do_work(w);
387 | 
388 | 		/* sleep for a while */
389 | 		usleep(w->wait_time);
390 | 	}
391 | 
392 | 	return NULL;
393 | }
394 | 
395 | static 
396 | int main_ping_pong_pipe(struct task_data *m, struct task_data *w, int nr_w)
397 | {
398 | 	struct epoll_ipc *epoll = &m->ipc.epoll;
399 | 	int wr_id;
400 | 
401 | 	/* unblock workers */
402 | 	for (int i = 0; i < epoll->nfds; i++) {
403 | 		/* read a ping message from a worker */
404 | 		ssize_t r = read(epoll->events[i].data.fd, &wr_id, sizeof(wr_id));
405 | 		if (r != sizeof(wr_id)) {
406 | 			perror("read failed");
407 | 			exit(1);
408 | 		}
409 | 
410 | 		/* send a pong message back to the worker */
411 | 		ssize_t w_ret = write(w[wr_id].ipc.pipe.rx.wfd, &wr_id, sizeof(wr_id));
412 | 		if (w_ret != sizeof(wr_id)) {
413 | 			perror("write failed");
414 | 			exit(1);
415 | 		}
416 | 	}
417 | 
418 | 	if (stopping) {
419 | 		for (int i = 0; i < nr_w; i++) {
420 | 			wr_id = i;
421 | 			ssize_t w_ret = write(w[i].ipc.pipe.rx.wfd, &wr_id, sizeof(wr_id));
422 | 			if (w_ret != sizeof(wr_id)) {
423 | 				perror("write failed (stopping)");
424 | 				exit(1);
425 | 			}
426 | 		}
427 | 		return 1;
428 | 	}
429 | 
430 | 	/* wait for response from a worker */
431 | 	while (!stopping) {
432 | 		m->ipc.epoll.nfds = epoll_wait(m->ipc.epoll.fd,
433 | 				m->ipc.epoll.events,
434 | 				MAX_EPOLL_EVENTS,
435 | 				100);
436 | 		switch (m->ipc.epoll.nfds) {
437 | 		case 0: /* time out then retry */
438 | 			__sync_synchronize();
439 | 			break;
440 | 		case -1: /* error */
441 | 			perror("failed to epoll_wait\n");
442 | 			exit(1);
443 | 			return 1;
444 | 		default: /* got some messages */
445 | 			return 0;
446 | 		}
447 | 	}
448 | 
449 | 	return 0;
450 | }
451 | 
452 | static 
453 | int main_ping_pong_futex(struct task_data *m, struct task_data *w, int nr_w)
454 | {
455 | 	m->ipc.futex = FUTEX_BLOCKED;
456 | 
457 | 	/* unblock workers */
458 | 	for (int i = 0; i < nr_w; i++)
459 | 		fpost(&w[i].ipc.futex);
460 | 
461 | 	if (stopping) {
462 | 		for (int i = 0; i < nr_w; i++)
463 | 			fpost(&w[i].ipc.futex);
464 | 		return 1;
465 | 	}
466 | 
467 | 	/* wait for response from a worker */
468 | 	fwait(&m->ipc.futex, NULL);
469 | 
470 | 	return 0;
471 | }
472 | 
473 | static int main_ping_pong(struct task_data *m, struct task_data *w, int nr_w)
474 | {
475 | 	/* full memory barrier */
476 | 	__sync_synchronize();
477 | 
478 | 
479 | 	switch(m->opt->ipc_type) {
480 | 	case IPC_FUTEX:
481 | 		return main_ping_pong_futex(m, w, nr_w);
482 | 	case IPC_PIPE_EPOLL:
483 | 		return main_ping_pong_pipe(m, w, nr_w);
484 | 	case IPC_SOCK_SELECT:
485 | 		break;
486 | 	default:
487 | 		fprintf(stderr, "incorrect ipc type: %d\n", m->opt->ipc_type);
488 | 		exit(1);
489 | 		break;
490 | 	}
491 | 
492 | 	return 0;
493 | }
494 | 
495 | static void main_create_ipc(struct task_data *m)
496 | {
497 | 	int ret = 0;
498 | 
499 | 	switch(m->opt->ipc_type) {
500 | 	case IPC_FUTEX:
501 | 		/* do nothing */
502 | 		break;
503 | 	case IPC_PIPE_EPOLL:
504 | 		/* create an epoll instance */
505 | 		m->ipc.epoll.fd = epoll_create1(0);
506 | 		if (m->ipc.epoll.fd == -1) {
507 | 			perror("failed to create an epoll\n");
508 | 			exit(1);
509 | 		}
510 | 
511 | 		/* add workers's tx.rfd to epollfd */
512 | 		for (int i = 0; i < m->opt->nr_workers; ++i) {
513 | 			struct task_data *w = &m->opt->workers[i];
514 | 			struct epoll_event ev;
515 | 
516 | 			ev.events = EPOLLIN;
517 | 			ev.data.fd = w->ipc.pipe.tx.rfd;
518 | 			ret = epoll_ctl(m->ipc.epoll.fd,
519 | 					EPOLL_CTL_ADD,
520 | 					w->ipc.pipe.tx.rfd,
521 | 					&ev);
522 | 			if (ret == -1) {
523 | 				perror("failed to add epoll_ctl\n");
524 | 				exit(1);
525 | 			}
526 | 		}
527 | 		break;
528 | 	case IPC_SOCK_SELECT:
529 | 		break;
530 | 	default:
531 | 		fprintf(stderr, "incorrect ipc type: %d\n", m->opt->ipc_type);
532 | 		exit(1);
533 | 		break;
534 | 	}
535 | }
536 | 
537 | static void *main_thr(void *arg)
538 | {
539 | 	struct opt *opt = arg;
540 | 	struct task_data *m = &opt->main;
541 | 	struct task_data *w = opt->workers;
542 | 	int nr_w = opt->nr_workers;
543 | 	int i;
544 | 	
545 | 	/* init main id */
546 | 	m->pid = gettid();
547 | 	m->id = MAIN_ID;
548 | 
549 | 	/* launch workers */
550 | 	for (i = 0; i < opt->nr_workers; i++) {
551 | 		int ret;
552 | 
553 | 		/* Create a worker thread. */
554 | 		w[i].id = i;
555 | 		worker_create_ipc(&w[i]);
556 | 		ret = pthread_create(&w[i].tid, NULL, worker_thr, &w[i]);
557 | 		if (ret) {
558 | 			fprintf(stderr, "error %d from pthread_create\n", ret);
559 | 			exit(1);
560 | 		}
561 | 	}
562 | 
563 | 	/* init ipc for the main thread */
564 | 	main_create_ipc(m);
565 | 
566 | 	/* do its work */
567 | 	for (m->stat.cnt = 0; 1; m->stat.cnt++) {
568 | 		debug("main[%lx] = %llu\n", m->tid, m->stat.cnt);
569 | 
570 | 		/* exchange a heartbeat signal */
571 | 		if (main_ping_pong(m, w, nr_w))
572 | 			break;
573 | 
574 | 		/* do some computation */
575 | 		do_work(m);
576 | 
577 | 		/* sleep for a while */
578 | 		usleep(m->wait_time);
579 | 	}
580 | 
581 | 	/* now, it's time to finish. wait for workers. */
582 | 	for (i = 0; i < opt->nr_workers; i++)
583 | 		pthread_join(w[i].tid, NULL);
584 | 
585 | 	return NULL;
586 | }
587 | 
588 | static void launch_main_thr(struct opt *opt)
589 | {
590 | 	struct task_data *m = &opt->main;
591 | 	int ret;
592 | 
593 | 	/* Create a main thread. Workers will be created by the main thread. */
594 | 	ret = pthread_create(&m->tid, NULL, main_thr, opt);
595 | 	if (ret) {
596 | 		fprintf(stderr, "error %d from pthread_create\n", ret);
597 | 		exit(1);
598 | 	}
599 | }
600 | 
601 | static void print_usage(void)
602 | {
603 | 	fprintf(stderr, "gbench usage:\n"
604 | 		"\t-i (--ipc): ipc type: futex, pipe, socket (def: futex)\n"
605 | 		"\t-s (--star): workers communicates only through a main threads\n"
606 | 		"\t  specify 'r1:w1,r2:w2' where r1 and r2 are run time;\n"
607 | 		"\t  w1 and w2 are wait time for each worker thread.\n"
608 | 		"\t  time units are all usec.\n"
609 | 		"\t-t (--time): benchmark time in seconds (def: 60)\n"
610 | 		"\t-F (--cache_footprint): cache footprint (kb, def: 256)\n"
611 | 	       );
612 | 	exit(1);
613 | }
614 | 
615 | static int get_nr_toks(char *s)
616 | {
617 | 	int nr = 0;
618 | 
619 | 	for (; *s != '\0'; s++) {
620 | 		if (*s == ',')
621 | 			nr++;
622 | 	}
623 | 	return nr + 1;
624 | }
625 | 
626 | static __u64 *alloc_data(struct task_data *t)
627 | {
628 | 	int matrix_size = get_matrix_size(t->opt);
629 | 
630 | 	return malloc(3 * sizeof(__u64) * matrix_size * matrix_size);
631 | }
632 | 
633 | static int parse_subopt_s(struct opt *opt, char *s)
634 | {
635 | 	struct task_data *m = &opt->main;
636 | 	struct task_data *w;
637 | 	int num = get_nr_toks(s);
638 | 	char *t;
639 | 	int i;
640 | 
641 | 	/* parse run time and wait time for main */
642 | 	t = strtok(s, ",:");
643 | 	m->run_time = atol(t);
644 | 	t = strtok(NULL, ",:");
645 | 	m->wait_time = atol(t);
646 | 	m->opt = opt;
647 | 
648 | 	/* alloc workers array */
649 | 	opt->nr_workers = num - 1;
650 | 	opt->workers = w = calloc(num - 1, sizeof(struct task_data));
651 | 	if (!opt->workers)
652 | 		return -ENOMEM;
653 | 	for (i = 0; i < opt->nr_workers; i++) {
654 | 		w[i].opt = opt;
655 | 		w[i].data = alloc_data(&w[i]);
656 | 		if (!w[i].data)
657 | 			return -ENOMEM;
658 | 	}
659 | 
660 | 	/* parse run time and wait time for each worker */
661 | 	for (i = 0; i < opt->nr_workers && t; i++) {
662 | 		t = strtok(NULL, ",:");
663 | 		w[i].run_time = atol(t);
664 | 		t = strtok(NULL, ",:");
665 | 		w[i].wait_time = atol(t);
666 | 	}
667 | 
668 | 	return 0;
669 | }
670 | 
671 | static int parse_ipc_type(char *s)
672 | {
673 | 	for (int i = 0; i < IPC_MAX; i++) {
674 | 		if (strcmp(ipc_str[i], s) == 0)
675 | 			return i;
676 | 	}
677 | 
678 | 	return -EINVAL;
679 | }
680 | 
681 | static int parse_options(struct opt *opt, int argc, char **argv)
682 | {
683 | 	char *option_string = "i:s:t:F:h";
684 | 	static struct option long_options[] = {
685 | 		{"ipc", required_argument, 0, 'i'},
686 | 		{"time", required_argument, 0, 't'},
687 | 		{"star", required_argument, 0, 's'},
688 | 		{"cache_footprint", required_argument, 0, 'F'},
689 | 		{"help", no_argument, 0, 'h'},
690 | 		{0, 0, 0, 0}
691 | 	};
692 | 	int c, ret;;
693 | 
694 | 	/* init opt to default values */
695 | 	memset(opt, 0, sizeof(*opt));
696 | 	opt->ipc_type = IPC_FUTEX;
697 | 	opt->cache_footprint_kb = 256;
698 | 	opt->benchmark_time_sec = 60;
699 | 	
700 | 	/* parse options */
701 | 	while (1) {
702 | 		int option_index = 0;
703 | 
704 | 		c = getopt_long(argc, argv, option_string,
705 | 				long_options, &option_index);
706 | 		if (c == -1)
707 | 			break;
708 | 
709 | 		switch(c) {
710 | 		case 'i':
711 | 			ret = parse_ipc_type(optarg);
712 | 			if (ret < 0 )
713 | 				return ret;
714 | 			opt->ipc_type = ret;
715 | 			break;
716 | 		case 't':
717 | 			opt->benchmark_time_sec = atoi(optarg);
718 | 			break;
719 | 		case 's':
720 | 			ret = parse_subopt_s(opt, optarg);
721 | 			if (ret)
722 | 				return ret;
723 | 			break;
724 | 		case 'F':
725 | 			opt->cache_footprint_kb = atoi(optarg);
726 | 			break;
727 | 		default:
728 | 			print_usage();
729 | 			break;
730 | 		}
731 | 	}
732 | 
733 | 	/* further initialize the main */
734 | 	opt->main.data = alloc_data(&opt->main);
735 | 	if (!opt->main.data)
736 | 		return -ENOMEM;
737 | 
738 | 	/* sanity check */
739 | 	if (opt->nr_workers < 1)
740 | 		print_usage();
741 | 
742 | 	return 0;
743 | }
744 | 
745 | static void stop_benchmark(struct opt *opt)
746 | {
747 | 	struct task_data *m = &opt->main;
748 | 
749 | 	/* full memory barrier */
750 | 	__sync_synchronize();
751 | 
752 | 	/* then update it atomically */
753 | 	__sync_bool_compare_and_swap(&stopping, 0, 1);
754 | 
755 | 	/* finally waiting for the termination of the main */
756 | 	fpost(&m->ipc.futex);
757 | 	pthread_join(m->tid, NULL);
758 | }
759 | 
760 | static void show_results(struct opt *opt)
761 | {
762 | 	struct task_data *m = &opt->main;
763 | 	struct task_data *w = opt->workers;
764 | 	int nr_w = opt->nr_workers;
765 | 
766 | 	printf("# thread\t %10s  %10s  %10s  %10s  %10s  %10s  %10s\n",
767 | 		"run_t", "run_a", "run_f", "wait_t", "wait_a", "wait_f", "cnt");
768 | 	printf("main-thr[%d]\t  %10lld  %10lld  %10lld  "
769 | 		"%10lld  %10lld  %10lld  %10lld\n",
770 | 		m->pid,
771 | 		m->run_time, m->stat.avg_run_time, m->stat.frq_run_time,
772 | 		m->wait_time, m->stat.avg_wait_time, m->stat.frq_wait_time,
773 | 		m->stat.cnt);
774 | 
775 | 	for (int i = 0; i < nr_w; i++) {
776 | 		printf("worker[%d]-%d\t  %10lld  %10lld  %10lld  "
777 | 			"%10lld  %10lld  %10lld  %10lld\n",
778 | 			w[i].pid, i, w[i].run_time, w[i].stat.avg_run_time,
779 | 			w[i].stat.frq_run_time, w[i].wait_time,
780 | 			w[i].stat.avg_wait_time, w[i].stat.frq_wait_time,
781 | 			w[i].stat.cnt);
782 | 	}
783 | }
784 | 
785 | static void init(void)
786 | {
787 | 	/* init base time for overflow-free time calculation */
788 | 	gettimeofday(&base_time, NULL);
789 | }
790 | 
791 | int main(int argc, char **argv)
792 | {
793 | 	struct opt opt;
794 | 
795 | 	init();
796 | 	parse_options(&opt, argc, argv);
797 | 	launch_main_thr(&opt);
798 | 	sleep(opt.benchmark_time_sec);
799 | 	stop_benchmark(&opt);
800 | 	show_results(&opt);
801 | 
802 | 	return 0;
803 | }
804 | 


--------------------------------------------------------------------------------
/bin/report:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import os
  3 | import sys
  4 | import subprocess
  5 | import glob
  6 | import csv
  7 | import datetime
  8 | import argparse
  9 | 
 10 | dbg_prt = False
 11 | cur_dir = os.path.dirname(__file__)
 12 | out_dir = None
 13 | 
 14 | def print_log(msg):
 15 |     BLUE = '\033[94m'
 16 |     ENDC = '\033[0m'
 17 |     print(BLUE + "# [report] " + msg + ENDC)
 18 | 
 19 | class stat_item:
 20 |     def __init__(self):
 21 |         # Information is stored in a list of tuples, each of which is a pair of
 22 |         # a property name (median FPS) and its value (60).
 23 |         self.stat = None
 24 | 
 25 |         # `*_diff` is a list of peformance difference against a baseline in %.
 26 |         # It is a list of float, and the order is the same of the original one.
 27 |         self.diff = None
 28 | 
 29 |     def calc_diff(self, baseline):
 30 |         # sanity check
 31 |         if self.stat == None or baseline.stat == None:
 32 |             return
 33 | 
 34 |         # self difference is always 0% difference.
 35 |         if self == baseline:
 36 |             self.diff = [0.0] * len(self.stat)
 37 |             return
 38 |         # calc delta over the basseline
 39 |         self.diff = []
 40 |         for (b, c) in zip(baseline.stat, self.stat):
 41 |             b_v = float(b[1])
 42 |             c_v = float(c[1])
 43 |             if b_v == c_v:
 44 |                 d = 0.0
 45 |             else:
 46 |                 if b_v == 0.0:
 47 |                     b_v = b_v + sys.float_info.min
 48 |                 d = ((c_v - b_v) / b_v) * 100.0
 49 |             self.diff.append(d)
 50 |         global dbg_prt
 51 |         if dbg_prt:
 52 |             print(self.diff)
 53 | 
 54 | class stat_app:
 55 | 
 56 |     def __init__(self, ldir, prefix, force):
 57 |         self.ldir = os.path.abspath(ldir)
 58 |         self.prefix = prefix
 59 |         self.nick = self.ldir.split("-")[-1]
 60 |         self.force = force
 61 | 
 62 |         # scinsight
 63 |         # ---------
 64 |         # * system call statistics
 65 |         self.sc_top10 = []
 66 |         self.sc_thr_nr = 0
 67 | 
 68 |         # ginsight
 69 |         # --------
 70 |         # * FPS
 71 |         #   - 97p, median, 1p, 0.1p
 72 |         self.fps = stat_item()
 73 |         # * CPU util
 74 |         #   - mediam, max
 75 |         self.cpu_util =  stat_item()
 76 |         # * GPU util
 77 |         #   - mediam, max
 78 |         self.gpu_util = stat_item()
 79 |         # * RAM util
 80 |         #   - median, max
 81 |         self.ram_util =  stat_item()
 82 | 
 83 |         # factorio
 84 |         # --------
 85 |         # * processing time
 86 |         #   - avg, min, max
 87 |         self.factorio = stat_item()
 88 | 
 89 |         # schbench
 90 |         # --------
 91 |         # * wakeup_lat (usec)
 92 |         #   - min, 50.0th, 90.0th, 99.0th, 99.9th, max
 93 |         self.wakeup_lat = stat_item()
 94 |         # * req_lat (usec)
 95 |         #   - min, 50.0th, 90.0th, 99.0th, 99.9th, max
 96 |         self.req_lat = stat_item()
 97 |         # * throughput (request per second)
 98 |         #   - min, 20.0th, average, 50.0th, 90.0th, max
 99 |         self.rps= stat_item()
100 | 
101 |         # procinsight
102 |         # -----------
103 |         # * CPU power state
104 |         #   - C0, poll, C1, c2, c3
105 |         self.cpu_pwr = stat_item()
106 |         # * Clock
107 |         #   - freq
108 |         self.clock_freq = stat_item()
109 |         # * Energy
110 |         #   - J, J/sec
111 |         self.energy = stat_item()
112 |         # * processor
113 |         #   - ipc, front-end stall (%), back-end stall (%), page faults
114 |         self.processor = stat_item()
115 |         # * scheduling
116 |         #   - context_switches, cpu-migrations, sched_wakeup
117 |         self.sched = stat_item()
118 | 
119 |     def exec_insight(self, c, a):
120 |         global cur_dir
121 |         cmd = os.path.abspath(os.path.join(cur_dir, c)) + " " + a
122 |         print_log("Running %s" % cmd)
123 |         p = subprocess.Popen(cmd, shell=True, stdout=None, stderr=None)
124 |         p.wait()
125 |         return p
126 | 
127 |     def all_csv_exist(self, insight, props):
128 |         for p in props:
129 |             l = os.path.join(self.ldir, self.prefix + "-" + insight + "-" + p + ".csv")
130 |             if os.path.isfile(l) == False:
131 |                 return False
132 |         return True
133 | 
134 |     def build_sc_stat(self):
135 |         # count the number of threads
136 |         scm_logs = glob.glob(os.path.join(self.ldir, self.prefix + "-scmon.*"))
137 |         self.sc_thr_nr = len(scm_logs)
138 |         if self.sc_thr_nr == 0:
139 |             return
140 | 
141 |         # if sc_log does not exists, generate it.
142 |         props = ["stat"]
143 |         if self.force == True or self.all_csv_exist("scinsight", props) == False:
144 |             arg = "-q -o {outdir} -l {log}".format(
145 |                     outdir = self.ldir, log = self.prefix)
146 |             self.exec_insight("scinsight", arg)
147 |             if self.all_csv_exist("scinsight", props) == False:
148 |                 return
149 | 
150 |         # load the stat CSV
151 |         sc_log = os.path.join(self.ldir, self.prefix + "-scinsight-stat.csv")
152 |         with open(sc_log, "r") as f:
153 |             rd = csv.reader(f)
154 |             rd.__next__()
155 |             for i in range(0,10):
156 |                 s, _, r = rd.__next__()
157 |                 self.sc_top10.append( (s.strip(), r.strip()) )
158 | 
159 |     def build_data_list(self, insight, prop, keys):
160 |         l = os.path.join(self.ldir, self.prefix + 
161 |                          "-" + insight + "-" + prop  + ".csv")
162 |         with open(l, "r") as f:
163 |             rd = csv.reader(f)
164 |             rd.__next__()
165 |             data= {}
166 |             for s, fps in rd:
167 |                 data[s.strip()] = fps.strip()
168 |             dlist = []
169 |             for k in keys:
170 |                 dlist.append( (k, data[k]) )
171 |             global dbg_prt
172 |             if dbg_prt:
173 |                 print(dlist)
174 |             return dlist
175 | 
176 |     def build_g_stat(self):
177 |         # if there is a missing CSV, generate all CSVs
178 |         props = ["cpu_load", "gpu_load", "fps", "ram_used"]
179 |         if self.force == True or self.all_csv_exist("ginsight", props) == False:
180 |             mlog = os.path.join(self.ldir, self.prefix + "-mangohud.csv")
181 |             arg = "-q -l {mlog} -o {outdir} -p {prefix}".format(
182 |                     mlog = mlog, outdir = self.ldir, prefix = self.prefix)
183 |             self.exec_insight("ginsight", arg)
184 |             if self.all_csv_exist("ginsight", props) == False:
185 |                 return
186 | 
187 |         # * FPS
188 |         #   - 97p, median, 1p, 0.1p
189 |         keys = ["Low 97%", "Median", "Low 1%", "Low 0.1%"]
190 |         self.fps.stat = self.build_data_list("ginsight", "fps", keys)
191 | 
192 |         # * CPU util, GPU util, RAM util
193 |         #   - mediam, max
194 |         keys = ["Median", "Max"]
195 |         self.cpu_util.stat = self.build_data_list("ginsight", "cpu_load", keys)
196 |         self.gpu_util.stat = self.build_data_list("ginsight", "gpu_load", keys)
197 |         self.ram_util.stat = self.build_data_list("ginsight", "ram_used", keys)
198 | 
199 |     def build_factorio_stat(self):
200 |         flog = os.path.join(self.ldir, self.prefix + ".factorio_out")
201 |         if os.path.isfile(flog) == False:
202 |             return
203 | 
204 |         with open(flog, "r") as f:
205 |             print_log("Loading %s" % flog)
206 |             for line in f:
207 |                 line = line.strip()
208 |                 if line.startswith("avg:") == False:
209 |                     continue
210 |                 # avg: 76.110 ms, min: 70.599 ms, max: 233.318 ms
211 |                 dlist = []
212 |                 for tok in line.split(","):
213 |                     k, v = tok.split(":")
214 |                     v = v[:-3].strip()
215 |                     dlist.append( (k, v) )
216 |                 self.factorio.stat = dlist
217 |                 break
218 |             global dbg_prt
219 |             if dbg_prt:
220 |                 print(self.factoriot.stat)
221 | 
222 |     def build_schbench_stat(self):
223 |         flog = os.path.join(self.ldir, self.prefix + ".schbench_out")
224 |         if os.path.isfile(flog) == False:
225 |             return
226 | 
227 |         class STAT:
228 |             # state
229 |             UNKNOWN = 0
230 |             WAKEUP_LAT = 1
231 |             REQ_LAT = 2
232 |             RPS = 3
233 | 
234 |             # state transition map
235 |             trans = [("Wakeup Latencies percentiles", WAKEUP_LAT),
236 |                      ("Request Latencies percentiles", REQ_LAT),
237 |                      ("RPS percentiles", RPS)]
238 | 
239 |             def __init__(self, sa):
240 |                 self.stat_app = sa
241 |                 self.s = STAT.UNKNOWN
242 |                 self.new_s = STAT.UNKNOWN
243 |                 self.dlist = []
244 | 
245 |             def is_new_state(self, line):
246 |                 for prefix, new_s in self.trans:
247 |                     if line.startswith(prefix):
248 |                         self.new_s = new_s
249 |                         return True
250 |                 return False
251 | 
252 |             def trans_state(self):
253 |                 # wrarp up the current state
254 |                 if self.s == self.WAKEUP_LAT:
255 |                     self.stat_app.wakeup_lat.stat = self.dlist
256 |                 elif self.s == self.REQ_LAT:
257 |                     self.stat_app.req_lat.stat = self.dlist
258 |                 elif self.s == self.RPS:
259 |                     self.stat_app.rps.stat = self.dlist
260 |                 # prep for the new state
261 |                 self.dlist = []
262 |                 self.s = self.new_s
263 |                 self.new_s = self.UNKNOWN
264 | 
265 |         s = STAT(self)
266 |         with open(flog, "r") as f:
267 |             for line in f:
268 |                 # transitioning to a new state
269 |                 if s.is_new_state(line):
270 |                     s.trans_state()
271 |                     continue
272 |                 # continue on the old state
273 |                 # * clean up lines
274 |                 line = line.strip()
275 |                 if line.startswith("current rps:") or \
276 |                         line.startswith("final rps goal was") or \
277 |                         line.startswith("setting worker threads") or \
278 |                         line.startswith("#") or line == "": \
279 |                     continue
280 |                 if line.startswith("* "):
281 |                     line = line[2:].strip()
282 |                 if line.find("(") != -1:
283 |                     line = line[: line.find("(")]
284 |                 line = line.strip()
285 |                 # * parsing
286 |                 #   - min=7403, max=7900
287 |                 if line.startswith("min"):
288 |                     mx_list = line.split(",")
289 |                     for mx in mx_list:
290 |                         kv = mx.split("=")
291 |                         s.dlist.append( (kv[0].strip(), kv[1].strip()) )
292 |                 #   - 20.0th: 7544
293 |                 #   - average rps: 7582.38
294 |                 else:
295 |                     kv = line.split(":")
296 |                     s.dlist.append( (kv[0].strip(), kv[1].strip()) )
297 | 
298 |     def build_proc_stat(self):
299 |         # if there is a missing CSV, generate all CSVs
300 |         props = ["cstate-sw", "energy-sw", "perf-sw", "sched-sw"]
301 |         if self.force == True or self.all_csv_exist("procinsight", props) == False:
302 |             arg = "-q -o {outdir} -l {prefix}".format(
303 |                     outdir = self.ldir, prefix = self.prefix)
304 |             self.exec_insight("procinsight", arg)
305 |             if self.all_csv_exist("procinsight", props) == False:
306 |                 return
307 | 
308 |         # * CPU power state
309 |         #   - C0, poll, C1, c2, c3
310 |         keys = ["C0", "POLL", "C1", "C2", "C3"]
311 |         self.cpu_pwr.stat = self.build_data_list("procinsight", "cstate-sw", keys)
312 |         # * Clock
313 |         #   - freq
314 |         keys = ["Freq"]
315 |         self.clock_freq.stat = self.build_data_list("procinsight", "cstate-sw", keys)
316 |         # * Energy
317 |         #   - J, J/sec
318 |         keys= ["J", "J/sec"]
319 |         self.energy.stat = self.build_data_list("procinsight", "energy-sw", keys)
320 |         # * processor
321 |         #   - ipc, front-end stall (%), back-end stall (%), page faults
322 |         keys = ["ipc", "frontend-stall (%)", "backend-stall (%)", "page-faults"]
323 |         self.processor.stat = self.build_data_list("procinsight", "perf-sw", keys)
324 |         # * scheduling
325 |         #   - context_switches, cpu-migrations, sched_wakeup
326 |         keys = ["context-switches", "cpu-migrations"]
327 |         self.sched.stat = self.build_data_list("procinsight", "perf-sw", keys)
328 |         keys = ["sched_wakeup"]
329 |         self.sched.stat = self.sched.stat + \
330 |                 self.build_data_list("procinsight", "sched-sw", keys)
331 | 
332 |     def build_stat(self):
333 |         self.build_sc_stat()
334 |         self.build_factorio_stat()
335 |         self.build_schbench_stat()
336 |         self.build_g_stat()
337 |         self.build_proc_stat()
338 | 
339 |     def calc_diff(self, baseline):
340 |         self.factorio.calc_diff(baseline.factorio)
341 |         self.wakeup_lat.calc_diff(baseline.wakeup_lat)
342 |         self.req_lat.calc_diff(baseline.req_lat)
343 |         self.rps.calc_diff(baseline.rps)
344 |         self.fps.calc_diff(baseline.fps)
345 |         self.cpu_util.calc_diff(baseline.cpu_util)
346 |         self.gpu_util.calc_diff(baseline.gpu_util)
347 |         self.ram_util.calc_diff(baseline.ram_util)
348 |         self.cpu_pwr.calc_diff(baseline.cpu_pwr)
349 |         self.clock_freq.calc_diff(baseline.clock_freq)
350 |         self.energy.calc_diff(baseline.energy)
351 |         self.processor.calc_diff(baseline.processor)
352 |         self.sched.calc_diff(baseline.sched)
353 | 
354 | def build_app_stats(args):
355 |     # buid app stat for each log directory
356 |     app_stats = []
357 |     for logdir in args.logdir:
358 |         stat = stat_app(logdir, args.prefix, args.force)
359 |         stat.build_stat()
360 |         app_stats.append(stat)
361 | 
362 |     # calculate diff in %
363 |     baseline = app_stats[0]
364 |     for comp in app_stats:
365 |         comp.calc_diff(baseline)
366 | 
367 |     return app_stats
368 | 
369 | def get_res_path(ldir, fname):
370 |     global out_dir
371 |     afil = os.path.join(ldir, fname)
372 |     rfil = os.path.relpath(afil, start = out_dir)
373 |     return afil, rfil
374 | 
375 | def gen_md_tbl(stat_app, suffix, tuples, header, f):
376 |     nick = stat_app.nick
377 |     afil, rfil = get_res_path(stat_app.ldir, stat_app.prefix + suffix)
378 | 
379 |     # generate header row
380 |     if header:
381 |         l1, l2 = "|    |", "| ---- |"
382 |         for t in tuples:
383 |             l1 = l1 + " " + t[0] + " | "
384 |             l2 = l2 + " ---: | "
385 |         print(l1, file = f)
386 |         print(l2, file = f)
387 |     # generate data
388 |     if os.path.isfile(afil):
389 |         l = "| [**" + nick + "**](" + rfil + ") |"
390 |     else:
391 |         l = "| **" + nick + "** |"
392 |     for t in tuples:
393 |         l = l + " " + t[1] + " | "
394 |     print(l, file = f)
395 | 
396 | def get_style(d):
397 |     if d == 0:
398 |         return ""
399 |     if d > 0.0:
400 |         return "**"
401 |     return "*"
402 | 
403 | def gen_md_diff_tbl(stat_app, suffix, st_item, baseline, f):
404 |     nick = stat_app.nick
405 |     afil, rfil = get_res_path(stat_app.ldir, stat_app.prefix + suffix)
406 | 
407 |     # generate header row for baseline
408 |     if baseline:
409 |         l1, l2 = "|    |", "| ---- |"
410 |         for s in st_item.stat:
411 |             l1 = l1 + " " + s[0] + " | "
412 |             l2 = l2 + " ---: | "
413 |         print(l1, file = f)
414 |         print(l2, file = f)
415 |     # generate data
416 |     if os.path.isfile(afil):
417 |         l = "| [**" + nick + "**](" + rfil + ") |"
418 |     else:
419 |         l = "| **" + nick + "** |"
420 |     for (s, d) in zip(st_item.stat, st_item.diff):
421 |         style = get_style(d)
422 |         l = l + " " + style + s[1] 
423 |         if baseline == False:
424 |             l = l + " (%.4f" % d + "%)"
425 |         l = l + style + " | "
426 |     print(l, file = f)
427 | 
428 | def gen_report_config(args, app_stats, f):
429 |     # title = prefix
430 |     print("``` {=html}", file = f)
431 |     print("<style>", file = f)
432 |     print("body { min-width: 80% !important; }", file = f)
433 |     print("</style>", file = f)
434 |     print("```", file = f)
435 | 
436 |     print("---", file = f)
437 |     print("title: %s" % args.prefix, file = f)
438 |     print("date: %s" % datetime.datetime.now(), file = f)
439 |     print("---", file = f)
440 |     print("\n\n", file = f)
441 | 
442 |     # #### Comparisons
443 |     print("### Comparisons\n", file = f)
444 |     for s in app_stats:
445 |         print("- **%s**: %s" % (s.nick, s.ldir), file = f)
446 |     print("\n\n", file = f)
447 | 
448 | def gen_report_syscall(args, app_stats, f):
449 |     for s in app_stats:
450 |         if s.sc_thr_nr == 0:
451 |             continue
452 |         print("### System call\n", file = f)
453 |         print("- Number of threads involved: %d\n" % s.sc_thr_nr, file = f)
454 |         print("- Top 10 system calls (%)\n", file = f)
455 |         gen_md_tbl(s, "-scinsight-stat.svg", s.sc_top10, True, f)
456 |     print("\n\n", file = f)
457 | 
458 | def gen_report_factorio(args, app_stats, f):
459 |     s = app_stats[0]
460 |     if s.factorio.stat == None:
461 |         return
462 |     print("### Map update time in msec\n", file = f)
463 |     gen_md_diff_tbl(s, ".factorio_out", s.factorio, True, f)
464 |     for s in app_stats[1:]:
465 |         if s.factorio== None:
466 |             continue
467 |         gen_md_diff_tbl(s, ".factorio_out", s.factorio, False, f)
468 |     print("\n\n", file = f)
469 | 
470 | def gen_report_schbench_rps(args, app_stats, f):
471 |     s = app_stats[0]
472 |     if s.rps.stat == None:
473 |         return
474 |     print("### Request per second (RPS)\n", file = f)
475 |     gen_md_diff_tbl(s, ".schbench_out", s.rps, True, f)
476 |     for s in app_stats[1:]:
477 |         if s.rps == None:
478 |             continue
479 |         gen_md_diff_tbl(s, ".schbench_out", s.rps, False, f)
480 |     print("\n\n", file = f)
481 | 
482 | def gen_report_schbench_req_lat(args, app_stats, f):
483 |     s = app_stats[0]
484 |     if s.req_lat.stat == None:
485 |         return
486 |     print("### Request latencies (usec)\n", file = f)
487 |     gen_md_diff_tbl(s, ".schbench_out", s.req_lat, True, f)
488 |     for s in app_stats[1:]:
489 |         if s.req_lat == None:
490 |             continue
491 |         gen_md_diff_tbl(s, ".schbench_out", s.req_lat, False, f)
492 |     print("\n\n", file = f)
493 | 
494 | def gen_report_schbench_wakeup_lat(args, app_stats, f):
495 |     s = app_stats[0]
496 |     if s.wakeup_lat.stat == None:
497 |         return
498 |     print("### Wakeup latencies (usec)\n", file = f)
499 |     gen_md_diff_tbl(s, ".schbench_out", s.wakeup_lat, True, f)
500 |     for s in app_stats[1:]:
501 |         if s.wakeup_lat == None:
502 |             continue
503 |         gen_md_diff_tbl(s, ".schbench_out", s.wakeup_lat, False, f)
504 |     print("\n\n", file = f)
505 | 
506 | def gen_report_schbench(args, app_stats, f):
507 |     gen_report_schbench_rps(args, app_stats, f)
508 |     gen_report_schbench_req_lat(args, app_stats, f)
509 |     gen_report_schbench_wakeup_lat(args, app_stats, f)
510 | 
511 | def gen_report_fps(args, app_stats, f):
512 |     s = app_stats[0]
513 |     if s.fps.stat == None:
514 |         return
515 |     print("### FPS\n", file = f)
516 |     gen_md_diff_tbl(s, "-ginsight-fps.svg", s.fps, True, f)
517 |     for s in app_stats[1:]:
518 |         if s.fps == None:
519 |             continue
520 |         gen_md_diff_tbl(s, "-ginsight-fps.svg", s.fps, False, f)
521 |     print("\n\n", file = f)
522 | 
523 | 
524 | def gen_report_cpu_util(args, app_stats, f):
525 |     s = app_stats[0]
526 |     if s.cpu_util.stat == None:
527 |         return
528 |     print("### CPU utilization\n", file = f)
529 |     gen_md_diff_tbl(s, "-ginsight-cpu_load.svg", s.cpu_util, True, f)
530 |     for s in app_stats[1:]:
531 |         if s.cpu_util == None:
532 |             continue
533 |         gen_md_diff_tbl(s, "-ginsight-cpu_load.svg", s.cpu_util, False, f)
534 |     print("\n\n", file = f)
535 | 
536 | def gen_report_gpu_util(args, app_stats, f):
537 |     s = app_stats[0]
538 |     if s.gpu_util.stat == None:
539 |         return
540 |     print("### GPU utilization\n", file = f)
541 |     gen_md_diff_tbl(s, "-ginsight-gpu_load.svg", s.gpu_util, True, f)
542 |     for s in app_stats[1:]:
543 |         if s.gpu_util == None:
544 |             continue
545 |         gen_md_diff_tbl(s, "-ginsight-gpu_load.svg", s.gpu_util, False, f)
546 |     print("\n\n", file = f)
547 | 
548 | def gen_report_ginsight_overview(args, app_stats, f):
549 |     # print header
550 |     #  |      | conf1 | conf2 | conf3 |
551 |     #  | ---- | ----- | ----- | ----- |
552 |     print("### Performance overview\n", file = f)
553 |     l1, l2 = "|      |", "| :----: |"
554 |     for s in app_stats:
555 |         l1 = l1 + " " + s.nick + " | "
556 |         l2 = l2 + " :---: | "
557 |     print(l1, file = f)
558 |     print(l2, file = f)
559 | 
560 |     # print graphs
561 |     #  | FPS  | img1  | img2  | img3  |
562 |     #  | cpu  | img1  | img2  | img3  |
563 |     #  | gpu  | img1  | img2  | img3  |
564 |     #  | ram  | img1  | img2  | img3  |
565 |     class report_conf:
566 |         name = ""
567 |         suffix = ""
568 |         def __init__(self, n, s):
569 |             self.name = n
570 |             self.suffix = s
571 | 
572 |     confs = [report_conf("FPS", "-ginsight-fps.svg"), 
573 |              report_conf("CPU", "-ginsight-cpu_load.svg"),
574 |              report_conf("GPU", "-ginsight-gpu_load.svg"),
575 |              report_conf("RAM", "-ginsight-ram_used.svg"),]
576 |     for c in confs:
577 |         l1 = "| " + c.name + " |"
578 |         for s in app_stats:
579 |             afil, rfil = get_res_path(s.ldir, s.prefix + c.suffix)
580 |             img = "![](" + rfil + ")"
581 |             l1 = l1 + " " + img + " | "
582 |         print(l1, file = f)
583 |     print("\n\n", file = f)
584 | 
585 | def gen_report_ram_util(args, app_stats, f):
586 |     s = app_stats[0]
587 |     if s.ram_util.stat == None:
588 |         return
589 |     print("### RAM usage\n", file = f)
590 |     gen_md_diff_tbl(s, "-ginsight-ram_used.svg", s.ram_util, True, f)
591 |     for s in app_stats[1:]:
592 |         if s.ram_util == None:
593 |             continue
594 |         gen_md_diff_tbl(s, "-ginsight-ram_used.svg", s.ram_util, False, f)
595 |     print("\n\n", file = f)
596 | 
597 | def gen_report_cpu_pwr(args, app_stats, f):
598 |     s = app_stats[0]
599 |     if s.cpu_pwr.stat == None:
600 |         return
601 |     print("### CPU power state\n", file = f)
602 |     gen_md_diff_tbl(s, "-procinsight-cstate-core.csv", s.cpu_pwr, True, f)
603 |     for s in app_stats[1:]:
604 |         if s.cpu_pwr == None:
605 |             continue
606 |         gen_md_diff_tbl(s, "-procinsight-cstate-core.csv", s.cpu_pwr, False, f)
607 |     print("\n\n", file = f)
608 | 
609 | def gen_report_clock_freq(args, app_stats, f):
610 |     s = app_stats[0]
611 |     if s.clock_freq.stat == None:
612 |         return
613 |     print("### Clock frequency\n", file = f)
614 |     gen_md_diff_tbl(s, "-procinsight-cstate-core.csv", s.clock_freq, True, f)
615 |     for s in app_stats[1:]:
616 |         if s.clock_freq == None:
617 |             continue
618 |         gen_md_diff_tbl(s, "-procinsight-cstate-core.csv", s.clock_freq, False, f)
619 |     print("\n\n", file = f)
620 | 
621 | def gen_report_energy(args, app_stats, f):
622 |     s = app_stats[0]
623 |     if s.energy.stat == None:
624 |         return
625 |     print("### Energy consumption \n", file = f)
626 |     gen_md_diff_tbl(s, "-procmon-energy__.log", s.energy, True, f)
627 |     for s in app_stats[1:]:
628 |         if s.energy == None:
629 |             continue
630 |         gen_md_diff_tbl(s, "-procmon-energy__.log", s.energy, False, f)
631 |     print("\n\n", file = f)
632 | 
633 | def gen_report_processor(args, app_stats, f):
634 |     s = app_stats[0]
635 |     if s.processor.stat == None:
636 |         return
637 |     print("### Processor state\n", file = f)
638 |     gen_md_diff_tbl(s, "-procinsight-perf-sw.csv", s.processor, True, f)
639 |     for s in app_stats[1:]:
640 |         if s.processor == None:
641 |             continue
642 |         gen_md_diff_tbl(s, "-procinsight-perf-sw.csv", s.processor, False, f)
643 |     print("\n\n", file = f)
644 | 
645 | def gen_report_sched(args, app_stats, f):
646 |     s = app_stats[0]
647 |     if s.sched.stat == None:
648 |         return
649 |     print("### Scheduling state\n", file = f)
650 |     gen_md_diff_tbl(s, "-procinsight-sched-core.csv", s.sched, True, f)
651 |     for s in app_stats[1:]:
652 |         if s.sched == None:
653 |             continue
654 |         gen_md_diff_tbl(s, "-procinsight-sched-core.csv", s.sched, False, f)
655 |     print("\n\n", file = f)
656 | 
657 | def gen_report(args, app_stats):
658 |     # generate a report in markdown format
659 |     file_md = args.output + ".md"
660 |     with open(file_md, "w") as f:
661 |         gen_report_config(args, app_stats, f)
662 |         gen_report_syscall(args, app_stats, f)
663 |         gen_report_factorio(args, app_stats, f)
664 |         gen_report_schbench(args, app_stats, f)
665 |         gen_report_ginsight_overview(args, app_stats, f)
666 |         gen_report_fps(args, app_stats, f)
667 |         gen_report_sched(args, app_stats, f)
668 |         gen_report_cpu_util(args, app_stats, f)
669 |         gen_report_gpu_util(args, app_stats, f)
670 |         gen_report_ram_util(args, app_stats, f)
671 |         gen_report_cpu_pwr(args, app_stats, f)
672 |         gen_report_clock_freq(args, app_stats, f)
673 |         gen_report_energy(args, app_stats, f)
674 |         gen_report_processor(args, app_stats, f)
675 | 
676 |     # convert the markdown report to html
677 |     file_html = args.output + ".html"
678 |     global cur_dir
679 |     cmd = "pandoc --standalone --toc %s -o %s" % (file_md, file_html)
680 |     print_log("Running %s" % cmd)
681 |     p = subprocess.Popen(cmd, shell=True, stdout=None, stderr=None)
682 |     p.wait()
683 | 
684 | def get_cmd_options(argv):
685 |     parser = argparse.ArgumentParser(
686 |             prog = "report",
687 |             description = "Generate a report of given log directories",
688 |             epilog = """
689 | For example, `report -l base_dir -l cmp_dir -p game1 -o report.md` compares `game1` logs in two directoreis -- `base_dir` and `cmp_dir` -- and generates `report.md`. `base_dir` is used in calculating the relative difference. When only one log directory is given, only the summary of results without comparison is provided. It expects certain file extensions: `*.factorio_out` for factorio benchmark and `*.schbench_out` for schbench benchmark.
690 |             """)
691 | 
692 |     parser.add_argument('-l', '--logdir', action='append', required=True,
693 |                         help='a log directory. When mulltiple `-l` options ' \
694 |                              'are given, comparison will be reported using ' \
695 |                              'the first one as a baseline.') 
696 |     parser.add_argument('-p', '--prefix', action='store', required=True,
697 |                         help='log file prefix for report generation') 
698 |     parser.add_argument('-o', '--output', action='store', required=True,
699 |                         help='target report file name in markdown format') 
700 |     parser.add_argument('-f', '--force', action='store_true',
701 |                         help='force to regenerate all CSV files') 
702 |     parser.add_argument('-g', '--debug', action='store_true',
703 |                         help='print out debug messages') 
704 |     args = parser.parse_args(argv)
705 |     global dbg_prt
706 |     dbg_prt = args.debug
707 |     global out_dir
708 |     out_dir = os.path.dirname(args.output)
709 |     return args
710 | 
711 | if __name__ == "__main__":
712 |     args = get_cmd_options(sys.argv[1:])
713 |     app_stats = build_app_stats(args)
714 |     gen_report(args, app_stats)
715 | 
716 | 


--------------------------------------------------------------------------------