├── LICENSE ├── README.md ├── plot-latency.py ├── pmucaps └── schedtime /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # performance-tools 2 | A collection of performance-related tools and scripts 3 | 4 | ## schedtime - A better time(1)-like tool 5 | 6 | schedtime executes a program and prints the run time statistics. It 7 | works like the time(1) command, only instead of printing high-level 8 | details such as system and user time schedtime tells you whether the 9 | program was waiting on userspace (sleep(), wait()), waiting in the 10 | kernel (preemption, locking), or blocked on i/o (disk and network 11 | accesses). 12 | 13 | ## pmucaps - Display Performance Monitoring Unit capabilities 14 | 15 | PMU features vary from system to system, particularly in the cloud where 16 | it's common for the absolute bare minimum of hardware-events to be 17 | available. **pmucaps** prints which PMU capabilities are available on the 18 | current system and includes details such as microarchitecture, Last 19 | Branch Record support, and whether precise IPs can be reported with 20 | PEBS. 21 | -------------------------------------------------------------------------------- /plot-latency.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Matt Fleming 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import pandas as pd 16 | import matplotlib.pyplot as plt 17 | import matplotlib.ticker as mtick 18 | import sys 19 | import seaborn as sns 20 | import argparse 21 | 22 | parser = argparse.ArgumentParser(description='Print scheduler stats') 23 | parser.add_argument("--cdf", action='store_true', 24 | help="Plot the latency as a cummulative distribution frequency") 25 | parser.add_argument("--title", metavar='t', type=str, nargs=1, 26 | help="Set the graph title") 27 | parser.add_argument("--subtitle", type=str, nargs=1, 28 | help="Set the subplot title") 29 | parser.add_argument("--xlim", type=int, nargs=1, 30 | help="Set the x-axis limits") 31 | parser.add_argument("--ylim", type=int, nargs=1, 32 | help="Set the y-axis limits") 33 | parser.add_argument("--output", type=str, nargs=1, 34 | help="Save the figure to output") 35 | parser.add_argument("file", metavar='f', type=str, nargs=1, 36 | help="read this perf.data file") 37 | parser.add_argument("pids", metavar='p', type=int, nargs='+', 38 | help="create a chart for this pid") 39 | 40 | args = parser.parse_args() 41 | 42 | #server_pid = 7648 43 | #client_pid = 7653 44 | data = pd.read_csv(args.file[0], index_col=0, parse_dates=True) 45 | #server = data.loc[data["pid"] == server_pid, "delay"] 46 | #client = data.loc[data["pid"] == client_pid, "delay"] 47 | #print(server.describe()) 48 | #print(client.describe()) 49 | pid_data = {} 50 | pids = args.pids 51 | for p in pids: 52 | print(p) 53 | pid_data[p] = data.loc[data["pid"] == p, "delay"] 54 | print(pid_data[p].describe()) 55 | 56 | # Use seaborn style defaults and set the default figure size 57 | #sns.set(rc={'figure.figsize':(11, 4)}) 58 | 59 | if len(pids) == 4: 60 | fig, ax = plt.subplots(2, 2) 61 | else: 62 | fig, ax = plt.subplots(len(pids), 1) 63 | 64 | # make a little extra space between the subplots 65 | fig.subplots_adjust(hspace=0.5) 66 | 67 | #client_ax = data[data["pid"] == client_pid].plot(marker='.', 68 | # linestyle='None', ax=ax[0]) 69 | # Set y-ticks for known values we want to see 70 | #client_ax.set_yticks([0, 4, max(client)]) 71 | 72 | #server_ax = data[data["pid"] == server_pid].plot(marker='.', 73 | # linestyle='None', ax=ax[1]) 74 | #server_ax.set_yticks([0, 4, max(server)]) 75 | 76 | if args.title: 77 | fig.suptitle(args.title[0]) 78 | 79 | def plot_cdf(p, a): 80 | #ser = pd.Series(pid_data[p]) 81 | stats_df = data.loc[data["pid"] == p].groupby('delay')['delay'] \ 82 | .agg('count').pipe(pd.DataFrame) \ 83 | .rename(columns = {'delay': 'frequency'}) 84 | # PDF 85 | stats_df['pdf'] = stats_df['frequency'] / sum(stats_df['frequency']) 86 | # CDF 87 | stats_df['cdf'] = stats_df['pdf'].cumsum() 88 | stats_df = stats_df.reset_index() 89 | # Plot 90 | p_ax = stats_df.plot(x = 'delay', y = 'cdf', legend=None, ax=a) 91 | p_ax.set_xlabel("latency (µs)") 92 | #p_ax.yaxis.set_major_formatter(mtick.PercentFormatter()) 93 | # manipulate 94 | vals = p_ax.get_yticks() 95 | p_ax.set_yticklabels(['{:,.2%}'.format(x) for x in vals]) 96 | 97 | if args.xlim: 98 | p_ax.set_xlim(0, args.xlim[0]) 99 | if args.ylim: 100 | p_ax.set_ylim(0, args.ylim[0]) 101 | 102 | 103 | def plot_plot(a): 104 | p_ax = pid_data[p].plot(marker='.', 105 | linestyle='None', ax=a, legend=None) 106 | #p_ax.set_yticks(0, max(pid_data[p])) 107 | p_ax.set_ylabel("latency (µs)") 108 | p_ax.set_xlabel("Time") 109 | if args.subtitle: 110 | p_ax.set_title(args.subtitle[0]) 111 | else: 112 | p_ax.set_title("pid=" + str(p)) 113 | if args.xlim: 114 | p_ax.set_xlim(0, args.xlim[0]) 115 | if args.ylim: 116 | p_ax.set_ylim(0, args.ylim[0]) 117 | 118 | col = 0 119 | row = 0 120 | 121 | def do_plot(p, a): 122 | if args.cdf: 123 | plot_cdf(p, a) 124 | else: 125 | plot_plot(a) 126 | 127 | for p in pids: 128 | if len(pids) == 4: 129 | a = ax[row][col] 130 | col = (col+1)%2 131 | # New row? 132 | if col == 0: 133 | row += 1 134 | elif len(pids) > 1: 135 | a = ax[row] 136 | row += 1 137 | else: 138 | a = ax 139 | row += 1 140 | 141 | do_plot(p, a) 142 | 143 | if args.output: 144 | fig.savefig(args.output[0]) 145 | 146 | plt.show() 147 | -------------------------------------------------------------------------------- /pmucaps: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2020 Matt Fleming 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | SYSFS_DIR=/sys/devices/cpu 17 | 18 | echo "Linux kernel version: $(uname -r)" 19 | if [ -e "${SYSFS_DIR}/caps/pmu_name" ]; then 20 | pmu_name=$(cat ${SYSFS_DIR}/caps/pmu_name) 21 | echo "Microarchitecture: $pmu_name" 22 | else 23 | echo "Microarchitecture: generic" 24 | sudo dmesg | grep Performance 25 | fi 26 | 27 | 28 | [ -e "${SYSFS_DIR}/caps/branches" ] && { 29 | num_entries=$(cat ${SYSFS_DIR}/caps/branches) 30 | echo "Last Branch Record (LBR) events available: $num_entries entries" 31 | } 32 | 33 | [ -e "${SYSFS_DIR}/caps/max_precise" ] && { 34 | precision=$(cat ${SYSFS_DIR}/caps/max_precise) 35 | case $precision in 36 | 0) 37 | str="sample IP can have arbitrary skid" 38 | ;; 39 | 1) 40 | str="sample IP must have constant skid" 41 | ;; 42 | 2) 43 | str="sample IP requested to have 0 skid" 44 | ;; 45 | 3) 46 | str="sample IP must have 0 skid" 47 | ;; 48 | *) 49 | str="Unknown max_precise value" 50 | ;; 51 | esac 52 | 53 | echo "Maximum PEBS precision available: $precision ($str)" 54 | } 55 | 56 | [ -e "${SYSFS_DIR}/events" ] && { 57 | echo "PEBS events available:" 58 | for e in ${SYSFS_DIR}/events/*; do 59 | event=$(basename $e) 60 | mask=$(cat $e) 61 | 62 | printf "\t$event => $mask\n" 63 | done 64 | } 65 | -------------------------------------------------------------------------------- /schedtime: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright 2020 Matt Fleming 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | # schedtime - Display a task run time statistics using scheduler hooks. 18 | # 19 | # Unlike bash's time built-in schedtime includes off-cpu time in its results. 20 | # 21 | 22 | from __future__ import print_function 23 | from bcc import BPF 24 | import argparse 25 | import os 26 | import signal 27 | import sys 28 | import subprocess 29 | import time 30 | 31 | source=""" 32 | #include 33 | #include 34 | 35 | struct data_item { 36 | u32 pid; 37 | u32 tgid; 38 | int kernel_stack_id; 39 | }; 40 | 41 | struct pid_item { 42 | u32 parent; 43 | u32 child; 44 | }; 45 | 46 | struct task_lifetime { 47 | u32 pid; 48 | u64 delta; 49 | u32 dead; 50 | char comm[TASK_COMM_LEN]; 51 | }; 52 | 53 | BPF_HASH(events, struct data_item); 54 | BPF_HASH(start, u32); 55 | BPF_STACK_TRACE(stack_traces, 40960); 56 | BPF_HASH(pids, struct pid_item); 57 | BPF_HASH(task_lifetime, u32, struct task_lifetime); 58 | 59 | int wake_up_probe(struct pt_regs *ctx, struct task_struct *p) 60 | { 61 | struct pid_item pid = {}; 62 | struct task_lifetime tl = {}; 63 | u64 one = 1; 64 | u64 now; 65 | u32 child = p->pid; 66 | 67 | // Don't trace kernel threads 68 | if (p->flags & PF_KTHREAD) 69 | return 0; 70 | 71 | pid.parent = bpf_get_current_pid_tgid(); 72 | pid.child = child; 73 | 74 | pids.update(&pid, &one); 75 | 76 | now = bpf_ktime_get_ns(); 77 | tl.pid = child; 78 | tl.delta = now; 79 | task_lifetime.update(&child, &tl); 80 | 81 | return 0; 82 | } 83 | 84 | int sched_in(struct pt_regs *ctx, struct task_struct *prev) { 85 | struct data_item data = {}; 86 | u32 pid = prev->pid; 87 | u32 ppid; 88 | u32 tgid; 89 | u64 ts, *tsp; 90 | u64 delta; 91 | 92 | // We may never see 'prev' again if it's a dying task. Update its stats. 93 | ts = bpf_ktime_get_ns(); 94 | start.update(&pid, &ts); 95 | 96 | // get the current thread's start time 97 | pid = bpf_get_current_pid_tgid(); 98 | tgid = bpf_get_current_pid_tgid() >> 32; 99 | tsp = start.lookup(&pid); 100 | if (tsp == 0) 101 | return 0; 102 | 103 | // calculate current thread's delta time 104 | delta = bpf_ktime_get_ns() - *tsp; 105 | start.delete(&pid); 106 | delta = delta / 1000; 107 | 108 | data.pid = pid; 109 | data.tgid = tgid; 110 | data.kernel_stack_id = stack_traces.get_stackid(ctx, 0); 111 | 112 | events.increment(data, delta); 113 | 114 | return 0; 115 | } 116 | 117 | int dead_task(struct pt_regs *ctx, struct task_struct *p) 118 | { 119 | u64 *begin, delta; 120 | u64 now = bpf_ktime_get_ns(); 121 | u32 pid = p->pid; 122 | struct task_lifetime *tl; 123 | 124 | tl = task_lifetime.lookup(&pid); 125 | if (!tl) 126 | return 0; 127 | 128 | delta = now - tl->delta; 129 | if ((s64)delta <= 0) 130 | delta = 1000; 131 | tl->delta = delta / 1000; 132 | tl->dead = 1; 133 | bpf_get_current_comm(&tl->comm, sizeof(tl->comm)); 134 | task_lifetime.update(&pid, tl); 135 | return 0; 136 | } 137 | """ 138 | 139 | fh = sys.stdout 140 | def eprint(*args, **kwargs): 141 | print(*args, file=fh, **kwargs) 142 | 143 | args = None 144 | 145 | def time_unit(us_val): 146 | return us_val / 1000.0 if args.milliseconds else us_val / 1000.0 / 1000.0 147 | 148 | def pct(x, total): 149 | return 0.0 if x == 0 else x / total * 100.0 150 | 151 | # Breadth-first search 152 | # 153 | # Find all vertices reachable from source vertex s. 154 | def BFS(s, edges): 155 | # Build adjacency lists 156 | adj = {s: []} 157 | for k,v in edges: 158 | if k.parent in adj: 159 | adj[k.parent].append(k.child) 160 | else: 161 | adj[k.parent] = [k.child] 162 | 163 | if k.child not in adj: 164 | adj[k.child] = [] 165 | 166 | vertices = set() 167 | for k, v in edges: 168 | vertices.add(k.parent) 169 | vertices.add(k.child) 170 | 171 | WHITE = 0 172 | GREY = 1 173 | BLACK = 2 174 | 175 | colour = {} 176 | parent = {} 177 | 178 | for u in vertices: 179 | colour[u] = WHITE 180 | parent[u] = None 181 | 182 | colour[s] = GREY 183 | q = [s] 184 | while q: 185 | u = q.pop(0) # FIFO queue 186 | for v in adj[u]: 187 | if colour[v] == WHITE: 188 | colour[v] = GREY 189 | parent[v] = u 190 | q.append(v) 191 | colour[u] = BLACK 192 | 193 | return [u for u in colour if colour[u] == BLACK] 194 | 195 | class Task(object): 196 | """A Linux task (thread)""" 197 | def __init__(self, pid): 198 | self.lifetime = 0 199 | self.pid = pid 200 | self.comm = "" 201 | self.times = { 202 | "disk io": 0, 203 | "voluntary wait": 0, 204 | "involuntary preempt": 0, 205 | "userspace locking": 0, 206 | "kernel locking": 0, 207 | "network io": 0, 208 | "page faults": 0 209 | } 210 | 211 | def total_time(self): 212 | """Return the total time this task ran.""" 213 | return self.lifetime - sum([self.times[g] for g in self.times]) 214 | 215 | def __str__(self): 216 | unit = "ms" if args.milliseconds else "s" 217 | oncpu = { "abs": self.total_time(), "pct": pct(self.total_time(), self.lifetime)} 218 | 219 | return " [{27:s}-{0:d}] lifetime: {1:.2f}{2:s}\n" \ 220 | "\n" \ 221 | " on-cpu: {3: >6.2f}{4:s} ({5:>4.1f}%)\n" \ 222 | "\n" \ 223 | " userspace locking: {6: >6.2f}{7:s} ({8:>4.1f}%)\n" \ 224 | " userspace wait: {9: >6.2f}{10:s} ({11:>4.1f}%)\n"\ 225 | "\n" \ 226 | " kernel locking: {12: >6.2f}{13:s} ({14:>4.1f}%)\n" \ 227 | " kernel wait: {15: >6.2f}{16:s} ({17:>4.1f}%)\n" \ 228 | "\n" \ 229 | " network i/o: {18: >6.2f}{19:s} ({20:>4.1f}%)\n" \ 230 | " disk i/o: {21: >6.2f}{22:s} ({23:>4.1f}%)\n" \ 231 | "\n" \ 232 | " page faults: {24: >6.2f}{25:s} ({26:>4.1f}%)\n" \ 233 | .format( 234 | self.pid, time_unit(self.lifetime), unit, time_unit(oncpu["abs"]), unit, oncpu["pct"], 235 | time_unit(self.times["userspace locking"]), unit, self.pct("userspace locking"), 236 | time_unit(self.times["voluntary wait"]), unit, self.pct("voluntary wait"), 237 | time_unit(self.times["kernel locking"]), unit, self.pct("kernel locking"), 238 | time_unit(self.times["involuntary preempt"]), unit, self.pct("involuntary preempt"), 239 | time_unit(self.times["network io"]), unit, self.pct("network io"), 240 | time_unit(self.times["disk io"]), unit, self.pct("disk io"), 241 | time_unit(self.times["page faults"]), unit, self.pct("page faults"), 242 | self.comm, 243 | ) 244 | 245 | def pct(self, group): 246 | return pct(self.times[group], self.lifetime) 247 | 248 | 249 | def sort_task_data(tasks, args): 250 | if not args.sort or args.sort == "lifetime": 251 | return sorted([tasks[t] for t in tasks], 252 | key=lambda t: t.lifetime, reverse=True) 253 | 254 | if args.sort == "command": 255 | return sorted([tasks[t] for t in tasks], 256 | key=lambda t: t.comm, reverse=False) 257 | if args.sort == "tid": 258 | return sorted([tasks[t] for t in tasks], 259 | key=lambda t: t.pid, reverse=False) 260 | if args.sort == "cpu": 261 | return sorted([tasks[t] for t in tasks], 262 | key=lambda t: t.total_time(), reverse=True) 263 | if args.sort == "disk": 264 | return sorted([tasks[t] for t in tasks], 265 | key=lambda t: t.times["disk io"], reverse=True) 266 | if args.sort == "faults": 267 | return sorted([tasks[t] for t in tasks], 268 | key=lambda t: t.times["page faults"], reverse=True) 269 | if args.sort == "net": 270 | return sorted([tasks[t] for t in tasks], 271 | key=lambda t: t.times["network io"], reverse=True) 272 | if args.sort == "klock": 273 | return sorted([tasks[t] for t in tasks], 274 | key=lambda t: t.times["kernel locking"], reverse=True) 275 | if args.sort == "kwait": 276 | return sorted([tasks[t] for t in tasks], 277 | key=lambda t: t.times["involuntary preempt"], reverse=True) 278 | if args.sort == "ulock": 279 | return sorted([tasks[t] for t in tasks], 280 | key=lambda t: t.times["userspace locking"], reverse=True) 281 | if args.sort == "uwait": 282 | return sorted([tasks[t] for t in tasks], 283 | key=lambda t: t.times["voluntary wait"], reverse=True) 284 | 285 | print("Invalid sort key") 286 | return [] 287 | 288 | if __name__ == '__main__': 289 | parser = argparse.ArgumentParser( 290 | description="Summarise scheduler time statistics for tasks", 291 | formatter_class=argparse.RawTextHelpFormatter) 292 | parser.add_argument("-v", "--verbose", action="store_true", 293 | help="display verbose time statistics for each task individually") 294 | parser.add_argument("-m", "--milliseconds", action="store_true", 295 | help="display time in milliseconds") 296 | parser.add_argument("-o", "--output", 297 | help="write output to file instead of stdout") 298 | parser.add_argument("-s", "--sort", 299 | help="sort the output of -v by one of: \n\n" 300 | " lifetime - task lifetime (default),\n" 301 | " command - the task command name,\n" 302 | " tid - the task thread id,\n" 303 | " cpu - on-cpu time,\n" 304 | " disk - disk i/o time,\n" 305 | " faults - page fault time,\n" 306 | " net - network i/o time,\n" 307 | " klock - kernel locking time,\n" 308 | " kwait - kernel wait time,\n" 309 | " ulock - userspace locking time,\n" 310 | " uwait - userspace wait time\n\n" 311 | " e.g. '-s cpu' sorts output by on-cpu time") 312 | parser.add_argument("command", help="comamand to execute") 313 | parser.add_argument("args", nargs=argparse.REMAINDER) 314 | args = parser.parse_args() 315 | 316 | if args.sort and not args.verbose: 317 | print("Error: -s requires -v") 318 | sys.exit(1) 319 | 320 | if args.output: 321 | fh = open(args.output, 'w') 322 | 323 | b = BPF(text=source) 324 | 325 | b.attach_kprobe(event_re=r'^finish_task_switch$|^finish_task_switch\.isra\.\d$', fn_name="sched_in") 326 | b.attach_kprobe(event="wake_up_new_task", fn_name="wake_up_probe") 327 | b.attach_kprobe(event="exit_thread", fn_name="dead_task") 328 | 329 | proc = subprocess.Popen([args.command] + args.args) 330 | try: 331 | stderr, stdout = proc.communicate() 332 | except KeyboardInterrupt: 333 | pass 334 | finally: 335 | proc.wait() 336 | 337 | tasks = {} 338 | p = b.get_table("pids") 339 | for p in BFS(proc.pid, p.items()): 340 | tasks[p] = Task(p) 341 | 342 | # Unfortunately some tasks may not be gone through do_exit() by the 343 | # time we try to read their lifetimes. 344 | wall_time = 0.0 345 | lifetimes = b.get_table("task_lifetime") 346 | ltimes = [] 347 | for k, v in lifetimes.items(): 348 | if k.value in tasks and v.dead: 349 | t = tasks[k.value] 350 | t.lifetime = v.delta 351 | t.comm = v.comm.decode() 352 | ltimes.append(k.value) 353 | if k.value == proc.pid: 354 | wall_time = v.delta 355 | 356 | # XXX This shouldn't be needed. How do we even end up with tasks in 357 | # the BFS tree if we haven't recorded their lifetime? 358 | bad_tasks = [t for t in tasks if tasks[t].lifetime == 0] 359 | for t in bad_tasks: 360 | del tasks[t] 361 | 362 | events = b.get_table("events") 363 | stack_traces = b.get_table("stack_traces") 364 | for k, v in sorted(events.items(), key=lambda counts: 365 | counts[1].value): 366 | 367 | kernel_stack = [] if k.kernel_stack_id < 0 else \ 368 | stack_traces.walk(k.kernel_stack_id) 369 | 370 | pid = k.pid 371 | if pid not in tasks: 372 | continue 373 | 374 | # The call stack should look like this: 375 | # finish_task_switch() 376 | # schedule() 377 | # [exit_to_usermode_loop | io_schedule | do_wait] 378 | kstack = [b.ksym(addr).decode("utf-8") for addr in kernel_stack] 379 | 380 | if len(kstack) < 3: 381 | continue 382 | 383 | # Some stacks don't have enough information to analyse. 384 | # Short-circut here. 385 | short_circuits = [ "exit_to_user_mode_loop", "ret_from_fork" ] 386 | skip = False 387 | for s in short_circuits: 388 | if s in kstack: 389 | skip = True 390 | 391 | if skip: 392 | continue 393 | 394 | func_maps = { 395 | "io_schedule": "disk io", 396 | "io_schedule_timeout": "disk io", 397 | "wait_transaction_locked": "disk io", 398 | "pipe_read": "disk io", 399 | "pipe_write": "disk io", 400 | "jbd2_log_wait_commit": "disk io", 401 | "do_epoll_wait": "disk io", 402 | "ep_poll": "disk io", 403 | "wait_woken": "disk io", 404 | 405 | "do_wait": "voluntary wait", 406 | "do_nanosleep": "voluntary wait", 407 | "sigsuspend": "voluntary wait", 408 | "do_sched_yield": "voluntary wait", 409 | 410 | "_cond_resched": "involuntary preempt", 411 | "__cond_resched": "involuntary preempt", 412 | "__cond_resched_lock": "involuntary preempt", 413 | "d_alloc_parallel": "involuntary preempt", 414 | # This catches vfork(). Not sure this is in the correct bucket 415 | "wait_for_completion_killable": "involuntary preempt", 416 | "msleep": "involuntary preempt", 417 | "wait_for_completion": "involuntary preempt", 418 | "expand_files": "involuntary preempt", 419 | "sysvec_reschedule_ipi": "involuntary preempt", 420 | 421 | "futex_wait_queue_me": "userspace locking", 422 | 423 | "rwsem_down_write_slowpath": "kernel locking", 424 | "rwsem_down_read_slowpath": "kernel locking", 425 | "__mutex_lock.isra.9": "kernel locking", 426 | 427 | "__skb_wait_for_more_packets": "network io", 428 | "unix_stream_read_generic": "network io", 429 | # This is questionable 430 | "do_select": "network io", 431 | 432 | "page_fault": "page faults", 433 | "exc_page_fault": "page faults", 434 | 435 | } 436 | 437 | group = None 438 | for func in func_maps: 439 | if func in kstack: 440 | group = func_maps[func] 441 | break 442 | 443 | if group: 444 | tasks[pid].times[group] += v.value 445 | continue 446 | 447 | print("Unknown schedule reason") 448 | kernel_stack = stack_traces.walk(k.kernel_stack_id) 449 | for addr in kernel_stack: 450 | print(" %s" % b.ksym(addr)) 451 | 452 | 453 | total = {} 454 | unit = "ms" if args.milliseconds else "s" 455 | total["abs"] = sum([tasks[t].lifetime for t in tasks]) 456 | 457 | calc_time = lambda l: sum([tasks[t].times[g] for t in tasks for g in l]) 458 | 459 | io = {} 460 | io["abs"] = calc_time(["disk io", "network io"]) 461 | io["pct"] = pct(io["abs"], total["abs"]) 462 | 463 | uwait = {} 464 | uwait["abs"] = calc_time(["voluntary wait", "userspace locking"]) 465 | uwait["pct"] = pct(uwait["abs"], total["abs"]) 466 | 467 | kwait = {} 468 | kwait["abs"] = calc_time(["involuntary preempt", "kernel locking", "page faults"]) 469 | kwait["pct"] = pct(kwait["abs"], total["abs"]) 470 | 471 | oncpu = {} 472 | oncpu["abs"] = total["abs"] - io["abs"] - uwait["abs"] - kwait["abs"] 473 | oncpu["pct"] = pct(oncpu["abs"], total["abs"]) 474 | 475 | eprint("total: %.2f%s, wall: %.2f%s, on-cpu: %.2f%s (%.1f%%), user wait: %.2f%s (%.1f%%), kernel wait: %.2f%s (%.1f%%), i/o: %.2f%s (%.1f%%)" % 476 | (time_unit(total["abs"]), unit, time_unit(wall_time), unit, 477 | time_unit(oncpu["abs"]), unit, oncpu["pct"], 478 | time_unit(uwait["abs"]), unit, uwait["pct"], 479 | time_unit(kwait["abs"]), unit, kwait["pct"], 480 | time_unit(io["abs"]), unit, io["pct"])) 481 | 482 | if not args.verbose: 483 | sys.exit(0) 484 | 485 | eprint("\ntasks (%d):" % (len(tasks))) 486 | for task in sort_task_data(tasks, args): 487 | eprint(task) 488 | 489 | if fh is not sys.stdout: 490 | fh.close() 491 | --------------------------------------------------------------------------------