#!/bin/bash # # killsnoop - trace kill() syscalls with signal/process details. # Written using Linux ftrace. # # This traces kill() syscalls, showing which process killed which pid and # returns the returncode (0 for success, -1 for error). # # This implementation is designed to work on older kernel versions, and without # kernel debuginfo. It works by dynamic tracing of the return value of kill() # and associating it with the previous kill() syscall return. # This approach is kernel version specific, and may not work on your version. # It is a workaround, and proof of concept for ftrace, until more kernel tracing # functionality is available. # # USAGE: ./killsnoop [-hst] [-d secs] [-p pid] [-n name] # # Run "killsnoop -h" for full usage. # # REQUIREMENTS: FTRACE and KPROBE CONFIG, syscalls:sys_enter_kill and # syscalls:sys_exit_kill kernel tracepoints (you may already have these # on recent kernels) and awk. # # From perf-tools: https://github.com/brendangregg/perf-tools # # See the killsnoop(8) man page (in perf-tools) for more info. # # COPYRIGHT: Copyright (c) 2014 Brendan Gregg. # COPYRIGHT: Copyright (c) 2014 Martin Probst. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # # (http://www.gnu.org/copyleft/gpl.html) # # 20-Jul-2014 Brendan Gregg Templated this. # 13-Sep-2014 Martin Probst Created this. ### default variables tracing=/sys/kernel/debug/tracing flock=/var/tmp/.ftrace-lock; wroteflock=0 opt_duration=0; duration=; opt_name=0; name=; opt_pid=0; pid=; ftext= opt_time=0; opt_fail=0; opt_file=0; file= kevent_entry=events/syscalls/sys_enter_kill kevent_return=events/syscalls/sys_exit_kill trap ':' INT QUIT TERM PIPE HUP # sends execution to end tracing section function usage { cat <<-END >&2 USAGE: killsnoop [-hst] [-d secs] [-p PID] [-n name] [filename] -d seconds # trace duration, and use buffers -n name # process name to match -p PID # PID to match on kill issue -t # include time (seconds) -s # human readable signal names -h # this usage message eg, killsnoop # watch kill()s live (unbuffered) killsnoop -d 1 # trace 1 sec (buffered) killsnoop -p 181 # trace kill()s issued to PID 181 only See the man page and example file for more info. END exit } function warn { if ! eval "$@"; then echo >&2 "WARNING: command failed \"$@\"" fi } function end { # disable tracing echo 2>/dev/null echo "Ending tracing..." 2>/dev/null cd $tracing warn "echo 0 > $kevent_entry/enable" warn "echo 0 > $kevent_return/enable" warn "echo > trace" (( wroteflock )) && warn "rm $flock" } function die { echo >&2 "$@" exit 1 } function edie { # die with a quiet end() echo >&2 "$@" exec >/dev/null 2>&1 end exit 1 } ### process options while getopts d:hn:p:st opt do case $opt in d) opt_duration=1; duration=$OPTARG ;; n) opt_name=1; name=$OPTARG ;; p) opt_pid=1; pid=$OPTARG ;; t) opt_time=1 ;; s) opt_fancy=1 ;; h|?) usage ;; esac done shift $(( $OPTIND - 1 )) (( $# )) && usage ### option logic (( opt_pid && opt_name )) && die "ERROR: use either -p or -n." (( opt_pid )) && ftext=" issued to PID $pid" (( opt_name )) && ftext=" issued by process name \"$name\"" if (( opt_duration )); then echo "Tracing kill()s$ftext for $duration seconds (buffered)..." else echo "Tracing kill()s$ftext. Ctrl-C to end." fi ### select awk # workaround for mawk fflush() [[ -x /usr/bin/mawk ]] && awk="mawk" && mawk -W interactive && \ [ $? -eq 0 ] && awk="mawk -W interactive" # workaround for gawk strtonum() [[ -x /usr/bin/gawk ]] && awk="gawk --non-decimal-data" ### check permissions cd $tracing || die "ERROR: accessing tracing. Root user? Kernel has FTRACE? debugfs mounted? (mount -t debugfs debugfs /sys/kernel/debug)" ### ftrace lock [[ -e $flock ]] && die "ERROR: ftrace may be in use by PID $(cat $flock) $flock" echo $$ > $flock || die "ERROR: unable to write $flock." wroteflock=1 ### setup and begin tracing echo nop > current_tracer if ! echo 1 > $kevent_entry/enable; then edie "ERROR: enabling kill() entry tracepoint Exiting." fi if ! echo 1 > $kevent_return/enable; then edie "ERROR: enabling kill() return tracepoint. Exiting." fi (( opt_time )) && printf "%-16s " "TIMEs" printf "%-16.16s %-6s %-8s %-10s %4s\n" "COMM" "PID" "TPID" "SIGNAL" "RETURN" # # Determine output format. It may be one of the following (newest first): # TASK-PID CPU# |||| TIMESTAMP FUNCTION # TASK-PID CPU# TIMESTAMP FUNCTION # To differentiate between them, the number of header fields is counted, # and an offset set, to skip the extra column when needed. # offset=$($awk 'BEGIN { o = 0; } $1 == "#" && $2 ~ /TASK/ && NF == 6 { o = 1; } $2 ~ /TASK/ { print o; exit }' trace) ### print trace buffer warn "echo > trace" ( if (( opt_duration )); then # wait then dump buffer sleep $duration cat trace else # print buffer live cat trace_pipe fi ) | $awk -v o=$offset -v opt_name=$opt_name -v name=$name \ -v opt_duration=$opt_duration -v opt_time=$opt_time \ -v opt_pid=$pid -v opt_fancy=$opt_fancy ' # fancy signal names BEGIN { signals[1] = "SIGHUP" signals[2] = "SIGINT" signals[3] = "SIGQUIT" signals[4] = "SIGILL" signals[6] = "SIGABRT" signals[8] = "SIGFPE" signals[9] = "SIGKILL" signals[11] = "SIGSEGV" signals[13] = "SIGPIPE" signals[14] = "SIGALRM" signals[15] = "SIGTERM" signals[10] = "SIGUSR1" signals[12] = "SIGUSR2" signals[17] = "SIGCHLD" signals[18] = "SIGCONT" signals[19] = "SIGSTOP" signals[20] = "SIGTSTP" signals[21] = "SIGTTIN" signals[22] = "SIGTTOU" } # common fields $1 != "#" { # task name can contain dashes comm = pid = $1 sub(/-[0-9][0-9]*/, "", comm) if (opt_name && match(comm, name) == 0) next sub(/.*-/, "", pid) } # sys_kill() entry $1 != "#" && $(4+o) ~ /sys_kill/ && $(5+o) !~ /->/ { # # eg: ... sys_kill(pid:... # kpid = $(5+o) signal = $(7+o) sub(/,$/, "", kpid) sub(/\)$/, "", signal) kpid = int("0x"kpid) signal = int("0x"signal) current[pid,"kpid"] = kpid current[pid,"signal"] = signal } # sys_kill exit $1 != "#" && $(5+o) ~ /->/ { rv = int($NF) killed_pid = current[pid,"kpid"] signal = current[pid,"signal"] delete current[pid,"kpid"] delete current[pid,"signal"] if(opt_pid && killed_pid != opt_pid) { next } if (opt_time) { time = $(3+o); sub(":", "", time) printf "%-16s ", time } if (opt_fancy) { if (signals[signal] != "") { signal = signals[signal] } } printf "%-16.16s %-6s %-8s %-10s %-4s\n", comm, pid, killed_pid, signal, rv } $0 ~ /LOST.*EVENTS/ { print "WARNING: " $0 > "/dev/stderr" } ' ### end tracing end