└── show-busy-java-threads.sh /show-busy-java-threads.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # @Function 3 | # Find out the highest cpu consumed threads of java processes, and print the stack of these threads. 4 | # 5 | # @Usage 6 | # $ ./show-busy-java-threads 7 | # 8 | # @online-doc https://github.com/oldratlee/useful-scripts/blob/master/docs/java.md#-show-busy-java-threads 9 | # @author Jerry Lee (oldratlee at gmail dot com) 10 | # @author superhj1987 (superhj1987 at 126 dot com) 11 | 12 | readonly PROG="`basename $0`" 13 | readonly -a COMMAND_LINE=("$0" "$@") 14 | # Get current user name via whoami command 15 | # See https://www.lifewire.com/current-linux-user-whoami-command-3867579 16 | # Because if run command by `sudo -u`, env var $USER is not rewritten/correct, just inherited from outside! 17 | readonly USER="`whoami`" 18 | 19 | ################################################################################ 20 | # util functions 21 | ################################################################################ 22 | 23 | # NOTE: $'foo' is the escape sequence syntax of bash 24 | readonly ec=$'\033' # escape char 25 | readonly eend=$'\033[0m' # escape end 26 | 27 | colorEcho() { 28 | local color=$1 29 | shift 30 | 31 | # if stdout is console, turn on color output. 32 | [ -t 1 ] && echo "$ec[1;${color}m$@$eend" || echo "$@" 33 | } 34 | 35 | colorPrint() { 36 | local color=$1 37 | shift 38 | 39 | colorEcho "$color" "$@" 40 | [ -n "$append_file" -a -w "$append_file" ] && echo "$@" >> "$append_file" 41 | [ -n "$store_dir" -a -w "$store_dir" ] && echo "$@" >> "${store_file_prefix}$PROG" 42 | } 43 | 44 | normalPrint() { 45 | echo "$@" 46 | [ -n "$append_file" -a -w "$append_file" ] && echo "$@" >> "$append_file" 47 | [ -n "$store_dir" -a -w "$store_dir" ] && echo "$@" >> "${store_file_prefix}$PROG" 48 | } 49 | 50 | redPrint() { 51 | colorPrint 31 "$@" 52 | } 53 | 54 | greenPrint() { 55 | colorPrint 32 "$@" 56 | } 57 | 58 | yellowPrint() { 59 | colorPrint 33 "$@" 60 | } 61 | 62 | bluePrint() { 63 | colorPrint 36 "$@" 64 | } 65 | 66 | die() { 67 | redPrint "Error: $@" 1>&2 68 | exit 1 69 | } 70 | 71 | logAndRun() { 72 | echo "$@" 73 | echo 74 | "$@" 75 | } 76 | 77 | logAndCat() { 78 | echo "$@" 79 | echo 80 | cat 81 | } 82 | 83 | usage() { 84 | local -r exit_code="$1" 85 | shift 86 | [ -n "$exit_code" -a "$exit_code" != 0 ] && local -r out=/dev/stderr || local -r out=/dev/stdout 87 | 88 | (( $# > 0 )) && { echo "$@"; echo; } > $out 89 | 90 | > $out cat < find out the highest cpu consumed threads from 100 | the specified java process. 101 | default from all java process. 102 | -c, --count set the thread count to show, default is 5. 103 | -a, --append-file specifies the file to append output as log. 104 | -S, --store-dir specifies the directory for storing 105 | the intermediate files, and keep files. 106 | default store intermediate files at tmp dir, 107 | and auto remove after run. use this option to keep 108 | files so as to review jstack/top/ps output later. 109 | delay the delay between updates in seconds. 110 | count the number of updates. 111 | delay/count arguments imitates the style of 112 | vmstat command. 113 | jstack control: 114 | -s, --jstack-path specifies the path of jstack command. 115 | -F, --force set jstack to force a thread dump. use when jstack 116 | does not respond (process is hung). 117 | -m, --mix-native-frames set jstack to print both java and native frames 118 | (mixed mode). 119 | -l, --lock-info set jstack with long listing. 120 | prints additional information about locks. 121 | CPU usage calculation control: 122 | -d, --top-delay specifies the delay between top samples. 123 | default is 0.5 (second). get thread cpu percentage 124 | during this delay interval. 125 | more info see top -d option. eg: -d 1 (1 second). 126 | -P, --use-ps use ps command to find busy thread(cpu usage) 127 | instead of top command. 128 | default use top command, because cpu usage of 129 | ps command is expressed as the percentage of 130 | time spent running during the *entire lifetime* 131 | of a process, this is not ideal in general. 132 | Miscellaneous: 133 | -h, --help display this help and exit. 134 | EOF 135 | 136 | exit $exit_code 137 | } 138 | 139 | ################################################################################ 140 | # Check os support 141 | ################################################################################ 142 | 143 | uname | grep '^Linux' -q || die "$PROG only support Linux, not support `uname` yet!" 144 | 145 | ################################################################################ 146 | # parse options 147 | ################################################################################ 148 | 149 | # NOTE: ARGS can not be declared as readonly!! 150 | # readonly declaration make exit code of assignment to be always 0, aka. the exit code of `getopt` in subshell is discarded. 151 | # tested on bash 4.2.46 152 | ARGS=`getopt -n "$PROG" -a -o p:c:a:s:S:Pd:Fmlh -l count:,pid:,append-file:,jstack-path:,store-dir:,use-ps,top-delay:,force,mix-native-frames,lock-info,help -- "$@"` 153 | [ $? -ne 0 ] && { echo; usage 1; } 154 | eval set -- "${ARGS}" 155 | 156 | while true; do 157 | case "$1" in 158 | -c|--count) 159 | count="$2" 160 | shift 2 161 | ;; 162 | -p|--pid) 163 | pid="$2" 164 | shift 2 165 | ;; 166 | -a|--append-file) 167 | append_file="$2" 168 | shift 2 169 | ;; 170 | -s|--jstack-path) 171 | jstack_path="$2" 172 | shift 2 173 | ;; 174 | -S|--store-dir) 175 | store_dir="$2" 176 | shift 2 177 | ;; 178 | -P|--use-ps) 179 | use_ps=true 180 | shift 181 | ;; 182 | -d|--top-delay) 183 | top_delay="$2" 184 | shift 2 185 | ;; 186 | -F|--force) 187 | force=-F 188 | shift 189 | ;; 190 | -m|--mix-native-frames) 191 | mix_native_frames=-m 192 | shift 193 | ;; 194 | -l|--lock-info) 195 | more_lock_info=-l 196 | shift 197 | ;; 198 | -h|--help) 199 | usage 200 | ;; 201 | --) 202 | shift 203 | break 204 | ;; 205 | esac 206 | done 207 | 208 | count=${count:-5} 209 | 210 | update_delay=${1:-0} 211 | [ -z "$1" ] && update_count=1 || update_count=${2:-0} 212 | (( update_count < 0 )) && update_count=0 213 | 214 | top_delay=${top_delay:-0.5} 215 | use_ps=${use_ps:-false} 216 | 217 | # check the directory of append-file(-a) mode, create if not exsit. 218 | if [ -n "$append_file" ]; then 219 | if [ -e "$append_file" ]; then 220 | [ -f "$append_file" ] || die "$append_file(specified by option -a, for storing run output files) exists but is not a file!" 221 | [ -w "$append_file" ] || die "file $append_file(specified by option -a, for storing run output files) exists but is not writable!" 222 | else 223 | append_file_dir="$(dirname "$append_file")" 224 | if [ -e "$append_file_dir" ]; then 225 | [ -d "$append_file_dir" ] || die "directory $append_file_dir(specified by option -a, for storing run output files) exists but is not a directory!" 226 | [ -w "$append_file_dir" ] || die "directory $append_file_dir(specified by option -a, for storing run output files) exists but is not writable!" 227 | else 228 | mkdir -p "$append_file_dir" || die "fail to create directory $append_file_dir(specified by option -a, for storing run output files)!" 229 | fi 230 | fi 231 | fi 232 | 233 | # check store directory(-S) mode, create directory if not exsit. 234 | if [ -n "$store_dir" ]; then 235 | if [ -e "$store_dir" ]; then 236 | [ -d "$store_dir" ] || die "$store_dir(specified by option -S, for storing output files) exists but is not a directory!" 237 | [ -w "$store_dir" ] || die "directory $store_dir(specified by option -S, for storing output files) exists but is not writable!" 238 | else 239 | mkdir -p "$store_dir" || die "fail to create directory $store_dir(specified by option -S, for storing output files)!" 240 | fi 241 | fi 242 | 243 | ################################################################################ 244 | # check the existence of jstack command 245 | ################################################################################ 246 | 247 | if [ -n "$jstack_path" ]; then 248 | [ -f "$jstack_path" ] || die "$jstack_path is NOT found!" 249 | [ -x "$jstack_path" ] || die "$jstack_path is NOT executalbe!" 250 | elif which jstack &> /dev/null; then 251 | jstack_path="`which jstack`" 252 | else 253 | [ -n "$JAVA_HOME" ] || die "jstack not found on PATH and No JAVA_HOME setting! Use -s option set jstack path manually." 254 | [ -f "$JAVA_HOME/bin/jstack" ] || die "jstack not found on PATH and \$JAVA_HOME/bin/jstack($JAVA_HOME/bin/jstack) file does NOT exists! Use -s option set jstack path manually." 255 | [ -x "$JAVA_HOME/bin/jstack" ] || die "jstack not found on PATH and \$JAVA_HOME/bin/jstack($JAVA_HOME/bin/jstack) is NOT executalbe! Use -s option set jstack path manually." 256 | jstack_path="$JAVA_HOME/bin/jstack" 257 | fi 258 | 259 | ################################################################################ 260 | # biz logic 261 | ################################################################################ 262 | 263 | readonly run_timestamp="`date "+%Y-%m-%d_%H:%M:%S.%N"`" 264 | readonly uuid="${PROG}_${run_timestamp}_${RANDOM}_$$" 265 | 266 | readonly tmp_store_dir="/tmp/${uuid}" 267 | if [ -n "$store_dir" ]; then 268 | readonly store_file_prefix="$store_dir/${run_timestamp}_" 269 | else 270 | readonly store_file_prefix="$tmp_store_dir/${run_timestamp}_" 271 | fi 272 | mkdir -p "$tmp_store_dir" 273 | 274 | cleanupWhenExit() { 275 | rm -rf "$tmp_store_dir" &> /dev/null 276 | } 277 | trap "cleanupWhenExit" EXIT 278 | 279 | headInfo() { 280 | colorEcho "0;34;42" ================================================================================ 281 | echo "$(date "+%Y-%m-%d %H:%M:%S.%N") [$(( i + 1 ))/$update_count]: ${COMMAND_LINE[@]}" 282 | colorEcho "0;34;42" ================================================================================ 283 | echo 284 | } 285 | 286 | if [ -n "${pid}" ]; then 287 | readonly ps_process_select_options="-p $pid" 288 | else 289 | readonly ps_process_select_options="-C java -C jsvc" 290 | fi 291 | 292 | # output field: pid, thread id(lwp), pcpu, user 293 | # order by pcpu(percentage of cpu usage) 294 | findBusyJavaThreadsByPs() { 295 | # 1. sort by %cpu by ps option `--sort -pcpu` 296 | # 2. use wide output(unlimited width) by ps option `-ww` 297 | # avoid trunk user column to username_fo+ or $uid alike 298 | local -a ps_cmd_line=(ps $ps_process_select_options -wwLo pid,lwp,pcpu,user --sort -pcpu --no-headers) 299 | local -r ps_out="$("${ps_cmd_line[@]}")" 300 | if [ -n "$store_dir" ]; then 301 | echo "$ps_out" | logAndCat "${ps_cmd_line[@]}" > "${store_file_prefix}$(( i + 1 ))_ps" 302 | fi 303 | 304 | echo "$ps_out" | head -n "${count}" 305 | } 306 | 307 | # top with output field: thread id, %cpu 308 | __top_threadId_cpu() { 309 | # 1. sort by %cpu by top option `-o %CPU` 310 | # unfortunately, top version 3.2 does not support -o option(supports from top version 3.3+), 311 | # use 312 | # HOME="$tmp_store_dir" top -H -b -n 1 313 | # combined 314 | # sort 315 | # instead of 316 | # HOME="$tmp_store_dir" top -H -b -n 1 -o '%CPU' 317 | # 2. change HOME env var when run top, 318 | # so as to prevent top command output format being change by .toprc user config file unexpectedly 319 | # 3. use option `-d 0.5`(update interval 0.5 second) and `-n 2`(update 2 times), 320 | # and use second time update data to get cpu percentage of thread in 0.5 second interval 321 | # 4. top v3.3, there is 1 black line between 2 update; 322 | # but top v3.2, there is 2 blank lines between 2 update! 323 | local -a top_cmd_line=(top -H -b -d $top_delay -n 2) 324 | local -r top_out=$(HOME="$tmp_store_dir" "${top_cmd_line[@]}") 325 | if [ -n "$store_dir" ]; then 326 | echo "$top_out" | logAndCat "${top_cmd_line[@]}" > "${store_file_prefix}$(( i + 1 ))_top" 327 | fi 328 | 329 | echo "$top_out" | 330 | awk 'BEGIN { blockIndex = 0; currentLineHasText = 0; prevLineHasText = 0; } { 331 | currentLineHasText = ($0 != "") 332 | if (prevLineHasText && !currentLineHasText) 333 | blockIndex++ # from text line to empty line, increase block index 334 | if (blockIndex == 3 && ($NF == "java" || $NF == "jsvc")) # $NF(last field) is command field 335 | # only print 4th text block(blockIndex == 3), aka. process info of second top update 336 | print $1 " " $9 # $1 is thread id field, $9 is %cpu field 337 | prevLineHasText = currentLineHasText # update prevLineHasText 338 | }' | sort -k2,2nr 339 | } 340 | 341 | __complete_pid_user_by_ps() { 342 | # ps output field: pid, thread id(lwp), user 343 | local -a ps_cmd_line=(ps $ps_process_select_options -wwLo pid,lwp,user --no-headers) 344 | local -r ps_out="$("${ps_cmd_line[@]}")" 345 | if [ -n "$store_dir" ]; then 346 | echo "$ps_out" | logAndCat "${ps_cmd_line[@]}" > "${store_file_prefix}$(( i + 1 ))_ps" 347 | fi 348 | 349 | local idx=0 350 | local -a line 351 | while IFS=" " read -a line ; do 352 | (( idx < count )) || break 353 | 354 | local threadId="${line[0]}" 355 | local pcpu="${line[1]}" 356 | 357 | # output field: pid, threadId, pcpu, user 358 | local output_fields="$( echo "$ps_out" | 359 | awk -v "threadId=$threadId" -v "pcpu=$pcpu" '$2==threadId { 360 | printf "%s %s %s %s\n", $1, threadId, pcpu, $3; exit 361 | }' )" 362 | if [ -n "$output_fields" ]; then 363 | (( idx++ )) 364 | echo "$output_fields" 365 | fi 366 | done 367 | } 368 | 369 | # output format is same as function findBusyJavaThreadsByPs 370 | findBusyJavaThreadsByTop() { 371 | __top_threadId_cpu | __complete_pid_user_by_ps 372 | } 373 | 374 | printStackOfThreads() { 375 | local -a line 376 | local idx=0 377 | while IFS=" " read -a line ; do 378 | local pid="${line[0]}" 379 | local threadId="${line[1]}" 380 | local threadId0x="0x`printf %x ${threadId}`" 381 | local pcpu="${line[2]}" 382 | local user="${line[3]}" 383 | 384 | (( idx++ )) 385 | local jstackFile="${store_file_prefix}$(( i + 1 ))_jstack_${pid}" 386 | [ -f "${jstackFile}" ] || { 387 | local -a jstack_cmd_line=( "$jstack_path" ${force} $mix_native_frames $more_lock_info ${pid} ) 388 | if [ "${user}" == "${USER}" ]; then 389 | # run without sudo, when java process user is current user 390 | logAndRun "${jstack_cmd_line[@]}" > ${jstackFile} 391 | elif [ $UID == 0 ]; then 392 | # if java process user is not current user, must run jstack with sudo 393 | logAndRun sudo -u "${user}" "${jstack_cmd_line[@]}" > ${jstackFile} 394 | else 395 | # current user is not root user, so can not run with sudo; print error message and rerun suggestion 396 | redPrint "[$idx] Fail to jstack busy(${pcpu}%) thread(${threadId}/${threadId0x}) stack of java process(${pid}) under user(${user})." 397 | redPrint "User of java process($user) is not current user($USER), need sudo to rerun:" 398 | yellowPrint " sudo ${COMMAND_LINE[@]}" 399 | normalPrint 400 | continue 401 | fi || { 402 | redPrint "[$idx] Fail to jstack busy(${pcpu}%) thread(${threadId}/${threadId0x}) stack of java process(${pid}) under user(${user})." 403 | normalPrint 404 | rm "${jstackFile}" &> /dev/null 405 | continue 406 | } 407 | } 408 | 409 | bluePrint "[$idx] Busy(${pcpu}%) thread(${threadId}/${threadId0x}) stack of java process(${pid}) under user(${user}):" 410 | 411 | if [ -n "$mix_native_frames" ]; then 412 | local sed_script="/--------------- $threadId ---------------/,/^---------------/ { 413 | /--------------- $threadId ---------------/b # skip first separator line 414 | /^---------------/d # delete second separator line 415 | p 416 | }" 417 | elif [ -n "$force" ]; then 418 | local sed_script="/^Thread ${threadId}:/,/^$/ { 419 | /^$/d; p # delete end separator line 420 | }" 421 | else 422 | local sed_script="/ nid=${threadId0x} /,/^$/ { 423 | /^$/d; p # delete end separator line 424 | }" 425 | fi 426 | { 427 | sed "$sed_script" -n ${jstackFile} 428 | echo 429 | } | tee ${append_file:+-a "$append_file"} ${store_dir:+-a "${store_file_prefix}$PROG"} 430 | done 431 | } 432 | 433 | ################################################################################ 434 | # Main 435 | ################################################################################ 436 | 437 | main() { 438 | local i 439 | # if update_count <= 0, infinite loop till user interrupted (eg: CTRL+C) 440 | for (( i = 0; update_count <= 0 || i < update_count; ++i )); do 441 | (( i > 0 )) && sleep "$update_delay" 442 | 443 | [ -n "$append_file" -o -n "$store_dir" ] && headInfo | tee ${append_file:+-a "$append_file"} ${store_dir:+-a "${store_file_prefix}$PROG"} > /dev/null 444 | (( update_count != 1 )) && headInfo 445 | 446 | if $use_ps; then 447 | findBusyJavaThreadsByPs 448 | else 449 | findBusyJavaThreadsByTop 450 | fi | printStackOfThreads 451 | done 452 | } 453 | 454 | main 455 | --------------------------------------------------------------------------------