#!/bin/bash # # funcslower - trace kernel functions slower than a threshold (microseconds). # Uses Linux ftrace. # # This uses the Linux ftrace function graph profiler to time kernel functions # and filter them based on a latency threshold. This is a proof of concept using # Linux ftrace capabilities on older kernels. # # USAGE: funcslower [-aChHPt] [-p PID] [-d secs] funcstring latency_us # # Run "funcslower -h" for full usage. # # REQUIREMENTS: FTRACE function graph, which you may already have available # and enabled in recent kernels. And awk. # # The output format is the same as the ftrace function graph trace format, # described in the kernel source under Documentation/trace/ftrace.txt. # Note that the output may be shuffled when different CPU buffers are read; # check the CPU column for changes, or include timestamps (-t) and post sort. # # WARNING: This uses dynamic tracing of kernel functions, and could cause # kernel panics or freezes. Test, and know what you are doing, before use. # # OVERHEADS: Timing and filtering is performed in-kernel context, costing # lower overheads than post-processing in user space. If you trace frequent # events (eg, pick a common function and a low threshold), you might want to # try the "-d secs" option, which buffers events in-kernel instead of printing # them live. # # From perf-tools: https://github.com/brendangregg/perf-tools # # COPYRIGHT: Copyright (c) 2014 Brendan Gregg. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # # (http://www.gnu.org/copyleft/gpl.html) # # 12-Jul-2014 Brendan Gregg Created this. ### default variables setglobal tracing = '/sys/kernel/debug/tracing' setglobal flock = '/var/tmp/.ftrace-lock' setglobal opt_duration = '0'; setglobal duration = ''; setglobal opt_pid = '0'; setglobal pid = ''; setglobal opt_tid = '0'; setglobal tid = '' setglobal pidtext = ''; setglobal opt_headers = '0'; setglobal opt_proc = '0'; setglobal opt_time = '0'; setglobal opt_cpu = '0' trap ':' INT QUIT TERM PIPE HUP # sends execution to end tracing section proc usage { cat <<-END >&2 USAGE: funcslower [-aChHPt] [-p PID] [-L TID] [-d secs] funcstring latency_us -a # all info (same as -HPt) -C # measure on-CPU time only -d seconds # trace duration, and use buffers -h # this usage message -H # include column headers -p PID # trace when this pid is on-CPU -L TID # trace when this thread is on-CPU -P # show process names & PIDs -t # show timestamps eg, funcslower vfs_read 10000 # trace vfs_read() slower than 10 ms See the man page and example file for more info. END exit } proc warn { if ! eval @ARGV { echo >&2 "WARNING: command failed \"$ifsjoin(ARGV)\"" } } proc end { # disable tracing echo 2>/dev/null echo "Ending tracing..." 2>/dev/null cd $tracing sh-expr ' opt_time )'&& warn "echo nofuncgraph-abstime > trace_options" sh-expr ' opt_proc )'&& warn "echo nofuncgraph-proc > trace_options" sh-expr ' opt_cpu )'&& warn "echo sleep-time > trace_options" warn "echo nop > current_tracer" sh-expr ' opt_pid )'&& warn "echo > set_ftrace_pid" warn "echo > set_ftrace_filter" warn "echo > set_graph_function" warn "echo 0 > tracing_thresh" warn "echo > trace" sh-expr ' wroteflock )'&& warn "rm $flock" } proc die { echo >&2 @ARGV exit 1 } proc edie { # die with a quiet end() echo >&2 @ARGV exec >/dev/null 2>&1 end exit 1 } ### process options while getopts aCd:hHp:L:Pt opt { match $opt { with a setglobal opt_headers = '1'; setglobal opt_proc = '1'; setglobal opt_time = '1' with C setglobal opt_cpu = '1'; with d setglobal opt_duration = '1'; setglobal duration = $OPTARG with p setglobal opt_pid = '1'; setglobal pid = $OPTARG with L setglobal opt_tid = '1'; setglobal tid = $OPTARG with H setglobal opt_headers = '1'; with P setglobal opt_proc = '1'; with t setglobal opt_time = '1'; with h|? usage } } shift $shExpr(' $OPTIND - 1 ') ### option logic sh-expr ' $# < 2 )'&& usage sh-expr ' opt_pid && opt_tid )'&& edie "ERROR: You can use -p or -L but not both." setglobal funcs = $1 shift setglobal thresh = $1 sh-expr ' opt_pid )'&& setglobal pidtext = "" for PID $pid"" sh-expr ' opt_tid )'&& setglobal pidtext = "" for TID $tid"" printf "Tracing \"$funcs\"$pidtext slower than $thresh us" if sh-expr ' opt_duration ' { echo " for $duration seconds..." } else { echo "... Ctrl-C to end." } ## select awk if sh-expr ' opt_duration ' { [[ -x /usr/bin/mawk ]] && setglobal awk = 'mawk' || setglobal awk = 'awk' } else { # workarounds for mawk/gawk fflush behavior if [[ -x /usr/bin/gawk ]] { setglobal awk = 'gawk' } elif [[ -x /usr/bin/mawk ]] { setglobal awk = '"mawk -W interactive'" } else { setglobal awk = 'awk' } } ### check permissions cd $tracing || die "ERROR: accessing tracing. Root user? Kernel has FTRACE? debugfs mounted? (mount -t debugfs debugfs /sys/kernel/debug)" ### ftrace lock [[ -e $flock ]] && die "ERROR: ftrace may be in use by PID $[cat $flock] $flock" echo $Pid > $flock || die "ERROR: unable to write $flock." setglobal wroteflock = '1' ### setup and commence tracing sysctl -q kernel.ftrace_enabled=1 # doesn't set exit status read mode < current_tracer [[ "$mode" != "nop" ]] && edie "ERROR: ftrace active (current_tracer=$mode)" if ! echo $thresh > tracing_thresh { edie "ERROR: setting tracing_thresh to $thresh. Exiting." } if sh-expr ' opt_pid ' { echo '' > set_ftrace_pid # ftrace expects kernel pids, which are thread ids for tid in [/proc/$pid/task/*] { if ! echo $(tid##*/) >> set_ftrace_pid { edie "ERROR: setting -p $pid (PID exist?). Exiting." } } } if sh-expr ' opt_tid ' { if ! echo $tid > set_ftrace_pid { edie "ERROR: setting -L $tid (TID exist?). Exiting." } } if ! echo $funcs > set_ftrace_filter { edie "ERROR: enabling \"$funcs\" filter. Function exist? Exiting." } if ! echo $funcs > set_graph_function { edie "ERROR: enabling \"$funcs\" graph. Exiting." } if ! echo function_graph > current_tracer { edie "ERROR: setting current_tracer to \"function_graph\". Exiting." } if sh-expr ' opt_cpu ' { if ! echo nosleep-time > trace_options { edie "ERROR: setting -C (nosleep-time). Exiting." } } # the following must be done after setting current_tracer if sh-expr ' opt_time ' { if ! echo funcgraph-abstime > trace_options { edie "ERROR: setting -t (funcgraph-abstime). Exiting." } } if sh-expr ' opt_proc ' { if ! echo funcgraph-proc > trace_options { edie "ERROR: setting -P (funcgraph-proc). Exiting." } } ### setup output filter setglobal cat = 'cat' if sh-expr ' opt_proc ' { # remove proc change entries, since PID is included. example: # ------------------------------------------ # 0) supervi-1699 => supervi-1693 # ------------------------------------------ # setglobal cat = "$awk' "/(^ ---|^$)/ || \$3 == \"=>\" { next } { print \$0 }""' } ### print trace buffer warn "echo > trace" if sh-expr ' opt_duration ' { sleep $duration if sh-expr ' opt_headers ' { $cat trace } else { $cat trace | grep -v '^#' } } else { # trace_pipe lack headers, so fetch them from trace sh-expr ' opt_headers )'&& cat trace eval $cat trace_pipe } ### end tracing end