diff --git a/0001-iolatency-use-ftrace-instances.patch b/0001-iolatency-use-ftrace-instances.patch new file mode 100644 index 0000000..1554f6b --- /dev/null +++ b/0001-iolatency-use-ftrace-instances.patch @@ -0,0 +1,98 @@ +From 7dcf792f2ce19d427a1b6103727c20f6a507a6ea Mon Sep 17 00:00:00 2001 +From: Andreas Klinger +Date: Thu, 7 Jan 2021 10:31:04 +0100 +Subject: [PATCH 1/5] iolatency: use ftrace instances + +Make use of ftrace instances feature for being able to start several +ftrace scripts simultaneously. + +Signed-off-by: Andreas Klinger +--- + iolatency | 28 +++++++++++++++++++++------- + 1 file changed, 21 insertions(+), 7 deletions(-) + +diff --git a/iolatency b/iolatency +index d646193..50f5861 100755 +--- a/iolatency ++++ b/iolatency +@@ -40,8 +40,9 @@ + # 20-Jul-2014 Brendan Gregg Created this. + + ### default variables ++sname=iolatency + tracing=/sys/kernel/debug/tracing +-flock=/var/tmp/.ftrace-lock ++flock=/var/tmp/.$sname-lock + bufsize_kb=4096 + opt_device=0; device=; opt_iotype=0; iotype=; opt_timestamp=0 + opt_interval=0; interval=1; opt_count=0; count=0; opt_queue=0 +@@ -49,7 +50,7 @@ trap ':' INT QUIT TERM PIPE HUP # sends execution to end tracing section + + function usage { + cat <<-END >&2 +- USAGE: iolatency [-hQT] [-d device] [-i iotype] [interval [count]] ++ USAGE: $sname [-hQT] [-d device] [-i iotype] [interval [count]] + -d device # device string (eg, "202,1) + -i iotype # match type (eg, '*R*' for all reads) + -Q # use queue insert as start time +@@ -58,11 +59,11 @@ function usage { + interval # summary interval, seconds (default 1) + count # number of summaries + eg, +- iolatency # summarize latency every second +- iolatency -Q # include block I/O queue time +- iolatency 5 2 # 2 x 5 second summaries +- iolatency -i '*R*' # trace reads +- iolatency -d 202,1 # trace device 202,1 only ++ $sname # summarize latency every second ++ $sname -Q # include block I/O queue time ++ $sname 5 2 # 2 x 5 second summaries ++ $sname -i '*R*' # trace reads ++ $sname -d 202,1 # trace device 202,1 only + + See the man page and example file for more info. + END +@@ -80,6 +81,7 @@ function end { + echo 2>/dev/null + echo "Ending tracing..." 2>/dev/null + cd $tracing ++ [[ -d instances ]] && cd instances && cd $sname + warn "echo 0 > events/block/$b_start/enable" + warn "echo 0 > events/block/block_rq_complete/enable" + if (( opt_device || opt_iotype )); then +@@ -87,6 +89,10 @@ function end { + warn "echo 0 > events/block/block_rq_complete/filter" + fi + warn "echo > trace" ++ ++ cd $tracing ++ [[ -d instances ]] && cd instances && rmdir $sname ++ + (( wroteflock )) && warn "rm $flock" + } + +@@ -103,6 +109,12 @@ function edie { + exit 1 + } + ++function use_instances { ++ cd instances ++ [[ -d $sname ]] || mkdir $sname ++ cd $sname ++} ++ + ### process options + while getopts d:hi:QT opt + do +@@ -142,6 +154,8 @@ fi + cd $tracing || die "ERROR: accessing tracing. Root user? Kernel has FTRACE? + debugfs mounted? (mount -t debugfs debugfs /sys/kernel/debug)" + ++[[ -d instances ]] && use_instances ++ + ### ftrace lock + [[ -e $flock ]] && die "ERROR: ftrace may be in use by PID $(cat $flock) $flock" + echo $$ > $flock || die "ERROR: unable to write $flock." +-- +2.20.1 + diff --git a/0002-iosnoop-use-ftrace-instances.patch b/0002-iosnoop-use-ftrace-instances.patch new file mode 100644 index 0000000..ef76704 --- /dev/null +++ b/0002-iosnoop-use-ftrace-instances.patch @@ -0,0 +1,102 @@ +From 3a670066e0293eb436e672a7b2c538bad656819c Mon Sep 17 00:00:00 2001 +From: Andreas Klinger +Date: Thu, 7 Jan 2021 10:37:00 +0100 +Subject: [PATCH 2/5] iosnoop: use ftrace instances + +Make use of ftrace instances feature for being able to start several +ftrace scripts simultaneously. + +Signed-off-by: Andreas Klinger +--- + iosnoop | 32 +++++++++++++++++++++++--------- + 1 file changed, 23 insertions(+), 9 deletions(-) + +diff --git a/iosnoop b/iosnoop +index 603b3e8..555c080 100755 +--- a/iosnoop ++++ b/iosnoop +@@ -53,8 +53,9 @@ + # 12-Jul-2014 Brendan Gregg Created this. + + ### default variables ++sname=iosnoop + tracing=/sys/kernel/debug/tracing +-flock=/var/tmp/.ftrace-lock ++flock=/var/tmp/.$sname-lock + bufsize_kb=4096 + opt_duration=0; duration=; opt_name=0; name=; opt_pid=0; pid=; ftext= + opt_start=0; opt_end=0; opt_device=0; device=; opt_iotype=0; iotype= +@@ -63,7 +64,7 @@ trap ':' INT QUIT TERM PIPE HUP # sends execution to end tracing section + + function usage { + cat <<-END >&2 +- USAGE: iosnoop [-hQst] [-d device] [-i iotype] [-p PID] [-n name] ++ USAGE: $sname [-hQst] [-d device] [-i iotype] [-p PID] [-n name] + [duration] + -d device # device string (eg, "202,1) + -i iotype # match type (eg, '*R*' for all reads) +@@ -75,13 +76,13 @@ function usage { + -h # this usage message + duration # duration seconds, and use buffers + eg, +- iosnoop # watch block I/O live (unbuffered) +- iosnoop 1 # trace 1 sec (buffered) +- iosnoop -Q # include queueing time in LATms +- iosnoop -ts # include start and end timestamps +- iosnoop -i '*R*' # trace reads +- iosnoop -p 91 # show I/O issued when PID 91 is on-CPU +- iosnoop -Qp 91 # show I/O queued by PID 91, queue time ++ $sname # watch block I/O live (unbuffered) ++ $sname 1 # trace 1 sec (buffered) ++ $sname -Q # include queueing time in LATms ++ $sname -ts # include start and end timestamps ++ $sname -i '*R*' # trace reads ++ $sname -p 91 # show I/O issued when PID 91 is on-CPU ++ $sname -Qp 91 # show I/O queued by PID 91, queue time + + See the man page and example file for more info. + END +@@ -99,6 +100,7 @@ function end { + echo 2>/dev/null + echo "Ending tracing..." 2>/dev/null + cd $tracing ++ [[ -d instances ]] && cd instances && cd $sname + warn "echo 0 > events/block/$b_start/enable" + warn "echo 0 > events/block/block_rq_complete/enable" + if (( opt_device || opt_iotype || opt_pid )); then +@@ -106,6 +108,10 @@ function end { + warn "echo 0 > events/block/block_rq_complete/filter" + fi + warn "echo > trace" ++ ++ cd $tracing ++ [[ -d instances ]] && cd instances && rmdir $sname ++ + (( wroteflock )) && warn "rm $flock" + } + +@@ -122,6 +128,12 @@ function edie { + exit 1 + } + ++function use_instances { ++ cd instances ++ [[ -d $sname ]] || mkdir $sname ++ cd $sname ++} ++ + ### process options + while getopts d:hi:n:p:Qst opt + do +@@ -172,6 +184,8 @@ wroteflock=1 + cd $tracing || die "ERROR: accessing tracing. Root user? Kernel has FTRACE? + debugfs mounted? (mount -t debugfs debugfs /sys/kernel/debug)" + ++[[ -d instances ]] && use_instances ++ + ### ftrace lock + [[ -e $flock ]] && die "ERROR: ftrace may be in use by PID $(cat $flock) $flock" + echo $$ > $flock || die "ERROR: unable to write $flock." +-- +2.20.1 + diff --git a/0003-iosnoop-set-wroteflock-flag.patch b/0003-iosnoop-set-wroteflock-flag.patch new file mode 100644 index 0000000..b0de211 --- /dev/null +++ b/0003-iosnoop-set-wroteflock-flag.patch @@ -0,0 +1,32 @@ +From 881fd96c4bcf49d70c55b71e04dcc02a08d2bb44 Mon Sep 17 00:00:00 2001 +From: Andreas Klinger +Date: Thu, 7 Jan 2021 10:38:45 +0100 +Subject: [PATCH 3/5] iosnoop: set wroteflock flag + +Set a flag to indicate that file lock was written. Also add a warning +when switching off current_tracer. + +Signed-off-by: Andreas Klinger +--- + iosnoop | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/iosnoop b/iosnoop +index 555c080..4eccb33 100755 +--- a/iosnoop ++++ b/iosnoop +@@ -189,9 +189,10 @@ cd $tracing || die "ERROR: accessing tracing. Root user? Kernel has FTRACE? + ### ftrace lock + [[ -e $flock ]] && die "ERROR: ftrace may be in use by PID $(cat $flock) $flock" + echo $$ > $flock || die "ERROR: unable to write $flock." ++wroteflock=1 + + ### setup and begin tracing +-echo nop > current_tracer ++warn "echo nop > current_tracer" + warn "echo $bufsize_kb > buffer_size_kb" + filter= + if (( opt_iotype )); then +-- +2.20.1 + diff --git a/0004-iosnoop-use-global-clock.patch b/0004-iosnoop-use-global-clock.patch new file mode 100644 index 0000000..bc2778c --- /dev/null +++ b/0004-iosnoop-use-global-clock.patch @@ -0,0 +1,30 @@ +From 50e8798ac41eca69f1554b315088f7fb164e8a6c Mon Sep 17 00:00:00 2001 +From: Andreas Klinger +Date: Thu, 7 Jan 2021 10:40:28 +0100 +Subject: [PATCH 4/5] iosnoop: use global clock + +Make usage of trace_clock global. This makes tracing output of different +cores compareable not only among each other but also to dmesg. + +Signed-off-by: Andreas Klinger +--- + iosnoop | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/iosnoop b/iosnoop +index 4eccb33..a6b1751 100755 +--- a/iosnoop ++++ b/iosnoop +@@ -186,6 +186,9 @@ cd $tracing || die "ERROR: accessing tracing. Root user? Kernel has FTRACE? + + [[ -d instances ]] && use_instances + ++# for comparison with dmesg we use global clock ++warn "echo global > trace_clock" ++ + ### ftrace lock + [[ -e $flock ]] && die "ERROR: ftrace may be in use by PID $(cat $flock) $flock" + echo $$ > $flock || die "ERROR: unable to write $flock." +-- +2.20.1 + diff --git a/0005-mmcsnoop-added-for-tracing-mmc-operations.patch b/0005-mmcsnoop-added-for-tracing-mmc-operations.patch new file mode 100644 index 0000000..e18b5ac --- /dev/null +++ b/0005-mmcsnoop-added-for-tracing-mmc-operations.patch @@ -0,0 +1,328 @@ +From 4608bdab5541df83f12098f31925a0e3dc1cc4e7 Mon Sep 17 00:00:00 2001 +From: Andreas Klinger +Date: Wed, 6 Jan 2021 19:47:35 +0100 +Subject: [PATCH 5/5] mmcsnoop: added for tracing mmc operations + +Prints out relevant datas of mmc commands by using ftrace framework. + +In comparison to the iosnoop script which is tracing block i/o +operations there are a couple of informations special to mmc devices and +shown by this script: +- operation command (opcode) +- command argument +- command flags +- error code + +Make use of ftrace instances feature for being able to start several +ftrace scripts simultaneously. Use trace_clock global to be compareable +to dmesg and between cpu cores. + +Signed-off-by: Andreas Klinger +--- + mmcsnoop | 294 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 294 insertions(+) + create mode 100755 mmcsnoop + +diff --git a/mmcsnoop b/mmcsnoop +new file mode 100755 +index 0000000..4b07633 +--- /dev/null ++++ b/mmcsnoop +@@ -0,0 +1,294 @@ ++#!/bin/bash ++# ++# SPDX-License-Identifier: GPL-2.0+ ++# ++# mmcsnoop - trace mmc device I/O. ++# Written using Linux ftrace. ++# ++# This traces mmc I/O at the mmc device interface, using the mmc: ++# tracepoints. This can help characterize the mmc I/O requested for the ++# devices and their resulting performance. ++# ++# USAGE: ./mmcsnoop [-h] [-d device] [-p pid] [-n name] [duration] ++# ++# Run "mmcsnoop -h" for full usage. ++# ++# REQUIREMENTS: FTRACE CONFIG, mmc:mmc_request_* tracepoints (you may ++# already have these on recent kernels). ++# ++# OVERHEAD: By default, mmcsnoop works without buffering, printing I/O ++# events as they happen (uses trace_pipe), context switching and consuming ++# CPU to do so. This has a limit of about 10,000 IOPS (depending on your ++# platform), at which point mmcsnoop will be consuming 1 CPU. The duration ++# mode uses buffering, and can handle much higher IOPS rates, however, the ++# buffer has a limit of about 50,000 I/O, after which events will be ++# dropped. You can tune this with bufsize_kb, which is per-CPU. Also note ++# that the "-n" option is currently post-filtered, so all events are ++# traced. ++# ++# This was written as a proof of concept for ftrace. It would be better ++# written using perf_events (after some capabilities are added), which has ++# a better buffering policy, or a tracer such as SystemTap or ktap. ++# ++# Used script iosnoop with the following copyright notices: ++# ++# From perf-tools: https://github.com/brendangregg/perf-tools ++# ++# COPYRIGHT: Copyright (c) 2014 Brendan Gregg. ++# 12-Jul-2014 Brendan Gregg Created this. ++# ++# ++# Implementation of mmcsnoop: ++# 06-Jan-2021 Andreas Klinger ++ ++### default variables ++sname=mmcsnoop ++tracing=/sys/kernel/debug/tracing ++flock=/var/tmp/.$sname-lock ++bufsize_kb=4096 ++opt_duration=0; duration=; opt_name=0; name=; opt_pid=0; pid=; ftext= ++opt_device=0; device=; ++trap ':' INT QUIT TERM PIPE HUP # sends execution to end tracing section ++ ++function usage { ++ cat <<-END >&2 ++ USAGE: $sname [-h] [-d device] [-p PID] [-n name] [duration] ++ -d device # device string (eg, "202,1) ++ -n name # process name to match on I/O issue ++ -p PID # PID to match on I/O issue ++ -h # this usage message ++ duration # duration seconds, and use buffers ++ eg, ++ $sname # watch mmc I/O live (unbuffered) ++ $sname 1 # trace 1 sec (buffered) ++ $sname -p 91 # show I/O issued when PID 91 is on-CPU ++ ++ See the man page and example file for more info. ++END ++ exit ++} ++ ++function warn { ++ if ! eval "$@"; then ++ echo >&2 "WARNING: command failed \"$@\"" ++ fi ++} ++ ++function end { ++ # disable tracing ++ echo 2>/dev/null ++ echo "Ending tracing..." 2>/dev/null ++ cd $tracing ++ [[ -d instances ]] && cd instances && cd $sname ++ warn "echo 0 > events/mmc/mmc_request_start/enable" ++ warn "echo 0 > events/mmc/mmc_request_done/enable" ++ if (( opt_device || opt_pid )); then ++ warn "echo 0 > events/mmc/mmc_request_start/filter" ++ warn "echo 0 > events/mmc/mmc_request_done/filter" ++ fi ++ warn "echo > trace" ++ ++ cd $tracing ++ [[ -d instances ]] && cd instances && rmdir $sname ++ ++ (( wroteflock )) && warn "rm $flock" ++} ++ ++function die { ++ echo >&2 "$@" ++ exit 1 ++} ++ ++function edie { ++ # die with a quiet end() ++ echo >&2 "$@" ++ exec >/dev/null 2>&1 ++ end ++ exit 1 ++} ++ ++function use_instances { ++ cd instances ++ [[ -d $sname ]] || mkdir $sname ++ cd $sname ++} ++ ++### process options ++while getopts d:hi:n:p:Qst opt ++do ++ case $opt in ++ d) opt_device=1; device=$OPTARG ;; ++ n) opt_name=1; name=$OPTARG ;; ++ p) opt_pid=1; pid=$OPTARG ;; ++ h|?) usage ;; ++ esac ++done ++shift $(( $OPTIND - 1 )) ++if (( $# )); then ++ opt_duration=1 ++ duration=$1 ++ shift ++fi ++if (( opt_device )); then ++ major=${device%,*} ++ minor=${device#*,} ++ dev=$(( (major << 20) + minor )) ++fi ++ ++### option logic ++(( opt_pid && opt_name )) && die "ERROR: use either -p or -n." ++(( opt_pid )) && ftext=" issued by PID $pid" ++(( opt_name )) && ftext=" issued by process name \"$name\"" ++if (( opt_duration )); then ++ echo "Tracing mmc I/O $ftext for $duration seconds (buffered)..." ++else ++ echo "Tracing mmc I/O $ftext. Ctrl-C to end." ++fi ++ ++### select awk ++(( opt_duration )) && use=mawk || use=gawk # workaround for mawk fflush() ++[[ -x /usr/bin/$use ]] && awk=$use || awk=awk ++wroteflock=1 ++ ++### check permissions ++cd $tracing || die "ERROR: accessing tracing. Root user? Kernel has FTRACE? ++ debugfs mounted? (mount -t debugfs debugfs /sys/kernel/debug)" ++ ++[[ -d instances ]] && use_instances ++ ++# for comparison with dmesg we use global clock ++warn "echo global > trace_clock" ++ ++### ftrace lock ++[[ -e $flock ]] && die "ERROR: ftrace may be in use by PID $(cat $flock) $flock" ++echo $$ > $flock || die "ERROR: unable to write $flock." ++wroteflock=1 ++ ++### setup and begin tracing ++warn "echo nop > current_tracer" ++warn "echo $bufsize_kb > buffer_size_kb" ++filter= ++if (( opt_device )); then ++ [[ "$filter" != "" ]] && filter="$filter && " ++ filter="${filter}name == \"$device\"" ++fi ++filter_i=$filter ++if (( opt_pid )); then ++ [[ "$filter_i" != "" ]] && filter_i="$filter_i && " ++ filter_i="${filter_i}common_pid == $pid" ++ [[ "$filter" == "" ]] && filter=0 ++fi ++if (( opt_device || opt_pid )); then ++ if ! echo "$filter_i" > events/mmc/mmc_request_start/filter || \ ++ ! echo "$filter" > events/mmc/mmc_request_done/filter ++ then ++ die "ERROR: setting -d or -t filter. Exiting." ++ fi ++fi ++if ! echo 1 > events/mmc/mmc_request_start/enable || \ ++ ! echo 1 > events/mmc/mmc_request_done/enable; then ++ die "ERROR: enabling mmc I/O tracepoints. Exiting." ++fi ++printf "%-14.14s %6s %6s %-18s %4s %10s %10s %10s %8s %8s %14s %14s\n" \ ++ "COMM" "PID" "DEV" "MRQ" "OPCD" "CMD_ARG" "CMD_FLAGS" \ ++ "CMD_ERR" "BYTES" "LAT[us]" "START-TS" "END-TS" ++ ++# ++# Determine output format. It may be one of the following (newest first): ++# TASK-PID CPU# |||| TIMESTAMP FUNCTION ++# TASK-PID CPU# TIMESTAMP FUNCTION ++# To differentiate between them, the number of header fields is counted, ++# and an offset set, to skip the extra column when needed. ++# ++offset=$($awk 'BEGIN { o = 0; } ++ $1 == "#" && $2 ~ /TASK/ && NF == 6 { o = 1; } ++ $2 ~ /TASK/ { print o; exit }' trace) ++ ++### print trace buffer ++warn "echo > trace" ++( if (( opt_duration )); then ++ # wait then dump buffer ++ sleep $duration ++ cat trace ++else ++ # print buffer live ++ cat trace_pipe ++fi ) | $awk -v o=$offset -v opt_name=$opt_name -v name=$name \ ++ -v opt_duration=$opt_duration ' ++ ++ # common fields ++ $1 != "#" { ++ # task name can contain dashes ++ comm = pid = $1 ++ sub(/-[0-9][0-9]*/, "", comm) ++ sub(/.*-/, "", pid) ++ time = $(3+o); sub(":", "", time) ++ dev = $(5+o) ++ sub(/:/, "", dev) ++ ++ sstr = "struct mmc_request[" ++ mrq0 = substr($0, index($0, sstr) + length(sstr)) ++ mrq = substr(mrq0, 0, index(mrq0, "]") - 1) ++ ++ opcode = substr($0, index($0, "opcode=") + 7) ++ sub(/ .*/, "", opcode) ++ } ++ ++ # mmc request start ++ $1 != "#" && $0 ~ /mmc_request_start/ { ++ if (opt_name && match(comm, name) == 0) ++ next ++ ++ cmd_arg = substr($0, index($0, "cmd_arg=") + 8) ++ sub(/ .*/, "", cmd_arg) ++ cmd_args[mrq] = cmd_arg ++ ++ cmd_flag = substr($0, index($0, "cmd_flags=") + 10) ++ sub(/ .*/, "", cmd_flag) ++ cmd_flags[mrq] = cmd_flag ++ ++ stime[mrq] = time ++ comms[mrq] = comm ++ pids[mrq] = pid ++ next ++ } ++ ++ # mmc request done ++ $1 != "#" && $0 ~ /mmc_request_done/ { ++ if (stime[mrq] > 0) { ++ etime[mrq] = time ++ latency = (etime[mrq] - stime[mrq]) * 1000000 ++ ++ cmd_err = substr($0, index($0, "cmd_err=") + 8) ++ sub(/ .*/, "", cmd_err) ++ ++ bytes_xfered = substr($0, index($0, "bytes_xfered=") + 13) ++ sub(/ .*/, "", bytes_xfered) ++ ++ printf "%-14.14s %6s %6s %-18s %4d %10s %10s %10s %8d %8d %14.6f %14.6f\n", ++ comms[mrq], pids[mrq], dev, mrq, opcode, ++ cmd_args[mrq], cmd_flags[mrq], cmd_err, ++ bytes_xfered, latency, stime[mrq], etime[mrq] ++ if (!opt_duration) ++ fflush() ++ ++ delete comms[mrq] ++ delete pids[mrq] ++ delete stime[mrq] ++ delete etime[mrq] ++ delete cmd_args[mrq] ++ delete cmd_flags[mrq] ++ } ++ next ++ } ++ ++ $0 ~ /LOST.*EVENTS/ { print "WARNING: " $0 > "/dev/stderr" } ++' ++ ++### end tracing ++end ++ ++# example output of ftrace ++# ++# kworker/2:1H-2100 [002] .... 615.658880: mmc_request_start: mmc1: start struct mmc_request[0000000097276643]: cmd_opcode=25 cmd_arg=0x574940 cmd_flags=0xb5 cmd_retries=0 stop_opcode=12 stop_arg=0x0 stop_flags=0x49d stop_retries=0 sbc_opcode=23 sbc_arg=0x320 sbc_flags=0x15 sbc_retires=0 blocks=800 block_size=512 blk_addr=5720384 data_flags=0x100 tag=1 can_retune=1 doing_retune=0 retune_now=0 need_retune=0 hold_retune=1 retune_period=0 ++# -0 [000] ..s1 615.690018: mmc_request_done: mmc1: end struct mmc_request[0000000097276643]: cmd_opcode=25 cmd_err=0 cmd_resp=0x900 0x0 0x0 0x0 cmd_retries=0 stop_opcode=12 stop_err=0 stop_resp=0x0 0x0 0x0 0x0 stop_retries=0 sbc_opcode=23 sbc_err=0 sbc_resp=0x0 0x0 0x0 0x0 sbc_retries=0 bytes_xfered=409600 data_err=0 tag=1 can_retune=1 doing_retune=0 retune_now=0 need_retune=0 hold_retune=1 retune_period=0 +-- +2.20.1 + diff --git a/iolatency b/iolatency index d646193..50f5861 100755 --- a/iolatency +++ b/iolatency @@ -40,8 +40,9 @@ # 20-Jul-2014 Brendan Gregg Created this. ### default variables +sname=iolatency tracing=/sys/kernel/debug/tracing -flock=/var/tmp/.ftrace-lock +flock=/var/tmp/.$sname-lock bufsize_kb=4096 opt_device=0; device=; opt_iotype=0; iotype=; opt_timestamp=0 opt_interval=0; interval=1; opt_count=0; count=0; opt_queue=0 @@ -49,7 +50,7 @@ trap ':' INT QUIT TERM PIPE HUP # sends execution to end tracing section function usage { cat <<-END >&2 - USAGE: iolatency [-hQT] [-d device] [-i iotype] [interval [count]] + USAGE: $sname [-hQT] [-d device] [-i iotype] [interval [count]] -d device # device string (eg, "202,1) -i iotype # match type (eg, '*R*' for all reads) -Q # use queue insert as start time @@ -58,11 +59,11 @@ function usage { interval # summary interval, seconds (default 1) count # number of summaries eg, - iolatency # summarize latency every second - iolatency -Q # include block I/O queue time - iolatency 5 2 # 2 x 5 second summaries - iolatency -i '*R*' # trace reads - iolatency -d 202,1 # trace device 202,1 only + $sname # summarize latency every second + $sname -Q # include block I/O queue time + $sname 5 2 # 2 x 5 second summaries + $sname -i '*R*' # trace reads + $sname -d 202,1 # trace device 202,1 only See the man page and example file for more info. END @@ -80,6 +81,7 @@ function end { echo 2>/dev/null echo "Ending tracing..." 2>/dev/null cd $tracing + [[ -d instances ]] && cd instances && cd $sname warn "echo 0 > events/block/$b_start/enable" warn "echo 0 > events/block/block_rq_complete/enable" if (( opt_device || opt_iotype )); then @@ -87,6 +89,10 @@ function end { warn "echo 0 > events/block/block_rq_complete/filter" fi warn "echo > trace" + + cd $tracing + [[ -d instances ]] && cd instances && rmdir $sname + (( wroteflock )) && warn "rm $flock" } @@ -103,6 +109,12 @@ function edie { exit 1 } +function use_instances { + cd instances + [[ -d $sname ]] || mkdir $sname + cd $sname +} + ### process options while getopts d:hi:QT opt do @@ -142,6 +154,8 @@ fi cd $tracing || die "ERROR: accessing tracing. Root user? Kernel has FTRACE? debugfs mounted? (mount -t debugfs debugfs /sys/kernel/debug)" +[[ -d instances ]] && use_instances + ### ftrace lock [[ -e $flock ]] && die "ERROR: ftrace may be in use by PID $(cat $flock) $flock" echo $$ > $flock || die "ERROR: unable to write $flock." diff --git a/iosnoop b/iosnoop index 603b3e8..a6b1751 100755 --- a/iosnoop +++ b/iosnoop @@ -53,8 +53,9 @@ # 12-Jul-2014 Brendan Gregg Created this. ### default variables +sname=iosnoop tracing=/sys/kernel/debug/tracing -flock=/var/tmp/.ftrace-lock +flock=/var/tmp/.$sname-lock bufsize_kb=4096 opt_duration=0; duration=; opt_name=0; name=; opt_pid=0; pid=; ftext= opt_start=0; opt_end=0; opt_device=0; device=; opt_iotype=0; iotype= @@ -63,7 +64,7 @@ trap ':' INT QUIT TERM PIPE HUP # sends execution to end tracing section function usage { cat <<-END >&2 - USAGE: iosnoop [-hQst] [-d device] [-i iotype] [-p PID] [-n name] + USAGE: $sname [-hQst] [-d device] [-i iotype] [-p PID] [-n name] [duration] -d device # device string (eg, "202,1) -i iotype # match type (eg, '*R*' for all reads) @@ -75,13 +76,13 @@ function usage { -h # this usage message duration # duration seconds, and use buffers eg, - iosnoop # watch block I/O live (unbuffered) - iosnoop 1 # trace 1 sec (buffered) - iosnoop -Q # include queueing time in LATms - iosnoop -ts # include start and end timestamps - iosnoop -i '*R*' # trace reads - iosnoop -p 91 # show I/O issued when PID 91 is on-CPU - iosnoop -Qp 91 # show I/O queued by PID 91, queue time + $sname # watch block I/O live (unbuffered) + $sname 1 # trace 1 sec (buffered) + $sname -Q # include queueing time in LATms + $sname -ts # include start and end timestamps + $sname -i '*R*' # trace reads + $sname -p 91 # show I/O issued when PID 91 is on-CPU + $sname -Qp 91 # show I/O queued by PID 91, queue time See the man page and example file for more info. END @@ -99,6 +100,7 @@ function end { echo 2>/dev/null echo "Ending tracing..." 2>/dev/null cd $tracing + [[ -d instances ]] && cd instances && cd $sname warn "echo 0 > events/block/$b_start/enable" warn "echo 0 > events/block/block_rq_complete/enable" if (( opt_device || opt_iotype || opt_pid )); then @@ -106,6 +108,10 @@ function end { warn "echo 0 > events/block/block_rq_complete/filter" fi warn "echo > trace" + + cd $tracing + [[ -d instances ]] && cd instances && rmdir $sname + (( wroteflock )) && warn "rm $flock" } @@ -122,6 +128,12 @@ function edie { exit 1 } +function use_instances { + cd instances + [[ -d $sname ]] || mkdir $sname + cd $sname +} + ### process options while getopts d:hi:n:p:Qst opt do @@ -172,12 +184,18 @@ wroteflock=1 cd $tracing || die "ERROR: accessing tracing. Root user? Kernel has FTRACE? debugfs mounted? (mount -t debugfs debugfs /sys/kernel/debug)" +[[ -d instances ]] && use_instances + +# for comparison with dmesg we use global clock +warn "echo global > trace_clock" + ### ftrace lock [[ -e $flock ]] && die "ERROR: ftrace may be in use by PID $(cat $flock) $flock" echo $$ > $flock || die "ERROR: unable to write $flock." +wroteflock=1 ### setup and begin tracing -echo nop > current_tracer +warn "echo nop > current_tracer" warn "echo $bufsize_kb > buffer_size_kb" filter= if (( opt_iotype )); then diff --git a/mmcsnoop b/mmcsnoop new file mode 100755 index 0000000..4131852 --- /dev/null +++ b/mmcsnoop @@ -0,0 +1,310 @@ +#!/bin/bash +# +# SPDX-License-Identifier: GPL-2.0+ +# +# mmcsnoop - trace mmc device I/O. +# Written using Linux ftrace. +# +# This traces mmc I/O at the mmc device interface, using the mmc: +# tracepoints. This can help characterize the mmc I/O requested for the +# devices and their resulting performance. +# +# USAGE: ./mmcsnoop [-h] [-d device] [-p pid] [-n name] [duration] +# +# Run "mmcsnoop -h" for full usage. +# +# REQUIREMENTS: FTRACE CONFIG, mmc:mmc_request_* tracepoints (you may +# already have these on recent kernels). +# +# OVERHEAD: By default, mmcsnoop works without buffering, printing I/O +# events as they happen (uses trace_pipe), context switching and consuming +# CPU to do so. This has a limit of about 10,000 IOPS (depending on your +# platform), at which point mmcsnoop will be consuming 1 CPU. The duration +# mode uses buffering, and can handle much higher IOPS rates, however, the +# buffer has a limit of about 50,000 I/O, after which events will be +# dropped. You can tune this with bufsize_kb, which is per-CPU. Also note +# that the "-n" option is currently post-filtered, so all events are +# traced. +# +# This was written as a proof of concept for ftrace. It would be better +# written using perf_events (after some capabilities are added), which has +# a better buffering policy, or a tracer such as SystemTap or ktap. +# +# Used script iosnoop with the following copyright notices: +# +# From perf-tools: https://github.com/brendangregg/perf-tools +# +# COPYRIGHT: Copyright (c) 2014 Brendan Gregg. +# 12-Jul-2014 Brendan Gregg Created this. +# +# +# Implementation of mmcsnoop: +# 06-Jan-2021 Andreas Klinger + +### default variables +sname=mmcsnoop +tracing=/sys/kernel/debug/tracing +flock=/var/tmp/.$sname-lock +bufsize_kb=4096 +opt_duration=0; duration=; opt_name=0; name=; opt_pid=0; pid=; ftext= +opt_device=0; device=; +trap ':' INT QUIT TERM PIPE HUP # sends execution to end tracing section + +function usage { + cat <<-END >&2 + USAGE: $sname [-h] [-d device] [-p PID] [-n name] [duration] + -d device # device string (eg, "202,1) + -n name # process name to match on I/O issue + -p PID # PID to match on I/O issue + -h # this usage message + duration # duration seconds, and use buffers + eg, + $sname # watch mmc I/O live (unbuffered) + $sname 1 # trace 1 sec (buffered) + $sname -p 91 # show I/O issued when PID 91 is on-CPU + + See the man page and example file for more info. +END + exit +} + +function warn { + if ! eval "$@"; then + echo >&2 "WARNING: command failed \"$@\"" + fi +} + +function end { + # disable tracing + echo 2>/dev/null + echo "Ending tracing..." 2>/dev/null + cd $tracing + [[ -d instances ]] && cd instances && cd $sname + warn "echo 0 > events/mmc/mmc_request_start/enable" + warn "echo 0 > events/mmc/mmc_request_done/enable" + if (( opt_device || opt_pid )); then + warn "echo 0 > events/mmc/mmc_request_start/filter" + warn "echo 0 > events/mmc/mmc_request_done/filter" + fi + warn "echo > trace" + + cd $tracing + [[ -d instances ]] && cd instances && rmdir $sname + + (( wroteflock )) && warn "rm $flock" +} + +function die { + echo >&2 "$@" + exit 1 +} + +function edie { + # die with a quiet end() + echo >&2 "$@" + exec >/dev/null 2>&1 + end + exit 1 +} + +function use_instances { + cd instances + [[ -d $sname ]] || mkdir $sname + cd $sname +} + +### process options +while getopts d:hi:n:p:Qst opt +do + case $opt in + d) opt_device=1; device=$OPTARG ;; + n) opt_name=1; name=$OPTARG ;; + p) opt_pid=1; pid=$OPTARG ;; + h|?) usage ;; + esac +done +shift $(( $OPTIND - 1 )) +if (( $# )); then + opt_duration=1 + duration=$1 + shift +fi +if (( opt_device )); then + major=${device%,*} + minor=${device#*,} + dev=$(( (major << 20) + minor )) +fi + +### option logic +(( opt_pid && opt_name )) && die "ERROR: use either -p or -n." +(( opt_pid )) && ftext=" issued by PID $pid" +(( opt_name )) && ftext=" issued by process name \"$name\"" +if (( opt_duration )); then + echo "Tracing mmc I/O $ftext for $duration seconds (buffered)..." +else + echo "Tracing mmc I/O $ftext. Ctrl-C to end." +fi + +### select awk +(( opt_duration )) && use=mawk || use=gawk # workaround for mawk fflush() +[[ -x /usr/bin/$use ]] && awk=$use || awk=awk +wroteflock=1 + +### check permissions +cd $tracing || die "ERROR: accessing tracing. Root user? Kernel has FTRACE? + debugfs mounted? (mount -t debugfs debugfs /sys/kernel/debug)" + +[[ -d instances ]] && use_instances + +# for comparison with dmesg we use global clock +warn "echo global > trace_clock" + +### ftrace lock +[[ -e $flock ]] && die "ERROR: ftrace may be in use by PID $(cat $flock) $flock" +echo $$ > $flock || die "ERROR: unable to write $flock." +wroteflock=1 + +### setup and begin tracing +warn "echo nop > current_tracer" +warn "echo $bufsize_kb > buffer_size_kb" +filter= +if (( opt_device )); then + [[ "$filter" != "" ]] && filter="$filter && " + filter="${filter}dev == $dev" +fi +filter_i=$filter +if (( opt_pid )); then + [[ "$filter_i" != "" ]] && filter_i="$filter_i && " + filter_i="${filter_i}common_pid == $pid" + [[ "$filter" == "" ]] && filter=0 +fi +if (( opt_device || opt_pid )); then + if ! echo "$filter_i" > events/mmc/mmc_request_start/filter || \ + ! echo "$filter" > events/mmc/mmc_request_done/filter + then + die "ERROR: setting -d or -t filter. Exiting." + fi +fi +if ! echo 1 > events/mmc/mmc_request_start/enable || \ + ! echo 1 > events/mmc/mmc_request_done/enable; then + die "ERROR: enabling mmc I/O tracepoints. Exiting." +fi +printf "%-14.14s %6s %6s %-18s %4s %10s %10s %10s %8s %8s %14s %14s\n" \ + "COMM" "PID" "DEV" "MRQ" "OPCD" "CMD_ARG" "CMD_FLAGS" \ + "CMD_ERR" "BYTES" "LAT[us]" "START-TS" "END-TS" + +# +# Determine output format. It may be one of the following (newest first): +# TASK-PID CPU# |||| TIMESTAMP FUNCTION +# TASK-PID CPU# TIMESTAMP FUNCTION +# To differentiate between them, the number of header fields is counted, +# and an offset set, to skip the extra column when needed. +# +offset=$($awk 'BEGIN { o = 0; } + $1 == "#" && $2 ~ /TASK/ && NF == 6 { o = 1; } + $2 ~ /TASK/ { print o; exit }' trace) + +### print trace buffer +warn "echo > trace" +( if (( opt_duration )); then + # wait then dump buffer + sleep $duration + cat trace +else + # print buffer live + cat trace_pipe +fi ) | $awk -v o=$offset -v opt_name=$opt_name -v name=$name \ + -v opt_duration=$opt_duration ' + + # common fields + $1 != "#" { + # task name can contain dashes + comm = pid = $1 + sub(/-[0-9][0-9]*/, "", comm) + sub(/.*-/, "", pid) + time = $(3+o); sub(":", "", time) + dev = $(5+o) + sub(/:/, "", dev) + + sstr = "struct mmc_request[" + mrq0 = substr($0, index($0, sstr) + length(sstr)) + mrq = substr(mrq0, 0, index(mrq0, "]") - 1) + + opcode = substr($0, index($0, "opcode=") + 7) + sub(/ .*/, "", opcode) + } + + # mmc request start + $1 != "#" && $0 ~ /mmc_request_start/ { + if (opt_name && match(comm, name) == 0) + next + + cmd_arg = substr($0, index($0, "cmd_arg=") + 8) + sub(/ .*/, "", cmd_arg) + cmd_args[mrq] = cmd_arg + + cmd_flag = substr($0, index($0, "cmd_flags=") + 10) + sub(/ .*/, "", cmd_flag) + cmd_flags[mrq] = cmd_flag + + stime[mrq] = time + comms[mrq] = comm + pids[mrq] = pid + next + } + + # mmc request done + $1 != "#" && $0 ~ /mmc_request_done/ { + if (stime[mrq] > 0) { + etime[mrq] = time + latency = (etime[mrq] - stime[mrq]) * 1000000 + + cmd_err = substr($0, index($0, "cmd_err=") + 8) + sub(/ .*/, "", cmd_err) + + bytes_xfered = substr($0, index($0, "bytes_xfered=") + 13) + sub(/ .*/, "", bytes_xfered) + + printf "%-14.14s %6s %6s %-18s %4d %10s %10s %10s %8d %8d %14.6f %14.6f\n", + comms[mrq], pids[mrq], dev, mrq, opcode, + cmd_args[mrq], cmd_flags[mrq], cmd_err, + bytes_xfered, latency, stime[mrq], etime[mrq] + if (!opt_duration) + fflush() + + delete comms[mrq] + delete pids[mrq] + delete stime[mrq] + delete etime[mrq] + delete cmd_args[mrq] + delete cmd_flags[mrq] + } + next + } + + $0 ~ /LOST.*EVENTS/ { print "WARNING: " $0 > "/dev/stderr" } +' + +### end tracing +end + +# tracer: nop +# +# _-----=> irqs-off +# / _----=> need-resched +# | / _---=> hardirq/softirq +# || / _--=> preempt-depth +# ||| / delay +# TASK-PID CPU# |||| TIMESTAMP FUNCTION +# | | | |||| | | +# kworker/2:1H-2100 [002] .... 615.658880: mmc_request_start: mmc1: start struct mmc_request[0000000097276643]: cmd_opcode=25 cmd_arg=0x574940 cmd_flags=0xb5 cmd_retries=0 stop_opcode=12 stop_arg=0x0 stop_flags=0x49d stop_retries=0 sbc_opcode=23 sbc_arg=0x320 sbc_flags=0x15 sbc_retires=0 blocks=800 block_size=512 blk_addr=5720384 data_flags=0x100 tag=1 can_retune=1 doing_retune=0 retune_now=0 need_retune=0 hold_retune=1 retune_period=0 +# -0 [000] ..s1 615.690018: mmc_request_done: mmc1: end struct mmc_request[0000000097276643]: cmd_opcode=25 cmd_err=0 cmd_resp=0x900 0x0 0x0 0x0 cmd_retries=0 stop_opcode=12 stop_err=0 stop_resp=0x0 0x0 0x0 0x0 stop_retries=0 sbc_opcode=23 sbc_err=0 sbc_resp=0x0 0x0 0x0 0x0 sbc_retries=0 bytes_xfered=409600 data_err=0 tag=1 can_retune=1 doing_retune=0 retune_now=0 need_retune=0 hold_retune=1 retune_period=0 +# kworker/0:2H-1973 [000] .... 615.690068: mmc_request_start: mmc1: start struct mmc_request[00000000ce997144]: cmd_opcode=13 cmd_arg=0x70000 cmd_flags=0x195 cmd_retries=5 stop_opcode=0 stop_arg=0x0 stop_flags=0x0 stop_retries=0 sbc_opcode=0 sbc_arg=0x0 sbc_flags=0x0 sbc_retires=0 blocks=0 block_size=0 blk_addr=0 data_flags=0x0 tag=0 can_retune=1 doing_retune=0 retune_now=0 need_retune=0 hold_retune=2 retune_period=0 +# kworker/0:2H-1973 [000] ..s. 615.690084: mmc_request_done: mmc1: end struct mmc_request[00000000ce997144]: cmd_opcode=13 cmd_err=0 cmd_resp=0x900 0x0 0x0 0x0 cmd_retries=5 stop_opcode=0 stop_err=0 stop_resp=0x0 0x0 0x0 0x0 stop_retries=0 sbc_opcode=0 sbc_err=0 sbc_resp=0x0 0x0 0x0 0x0 sbc_retries=0 bytes_xfered=0 data_err=0 tag=0 can_retune=1 doing_retune=0 retune_now=0 need_retune=0 hold_retune=2 retune_period=0 +# kworker/2:1H-2100 [002] .... 615.690579: mmc_request_start: mmc1: start struct mmc_request[0000000097276643]: cmd_opcode=24 cmd_arg=0x4000 cmd_flags=0xb5 cmd_retries=0 stop_opcode=0 stop_arg=0x0 stop_flags=0x0 stop_retries=0 sbc_opcode=0 sbc_arg=0x0 sbc_flags=0x0 sbc_retires=0 blocks=1 block_size=512 blk_addr=16384 data_flags=0x100 tag=1 can_retune=1 doing_retune=0 retune_now=0 need_retune=0 hold_retune=1 retune_period=0 +# -0 [000] ..s1 615.693426: mmc_request_done: mmc1: end struct mmc_request[0000000097276643]: cmd_opcode=24 cmd_err=0 cmd_resp=0x900 0x0 0x0 0x0 cmd_retries=0 stop_opcode=0 stop_err=0 stop_resp=0x0 0x0 0x0 0x0 stop_retries=0 sbc_opcode=0 sbc_err=0 sbc_resp=0x0 0x0 0x0 0x0 sbc_retries=0 bytes_xfered=512 data_err=0 tag=1 can_retune=1 doing_retune=0 retune_now=0 need_retune=0 hold_retune=1 retune_period=0 +# kworker/0:2H-1973 [000] .... 615.693463: mmc_request_start: mmc1: start struct mmc_request[00000000ce997144]: cmd_opcode=13 cmd_arg=0x70000 cmd_flags=0x195 cmd_retries=5 stop_opcode=0 stop_arg=0x0 stop_flags=0x0 stop_retries=0 sbc_opcode=0 sbc_arg=0x0 sbc_flags=0x0 sbc_retires=0 blocks=0 block_size=0 blk_addr=0 data_flags=0x0 tag=0 can_retune=1 doing_retune=0 retune_now=0 need_retune=0 hold_retune=2 retune_period=0 +# kworker/0:2H-1973 [000] ..s1 615.693479: mmc_request_done: mmc1: end struct mmc_request[00000000ce997144]: cmd_opcode=13 cmd_err=0 cmd_resp=0x900 0x0 0x0 0x0 cmd_retries=5 stop_opcode=0 stop_err=0 stop_resp=0x0 0x0 0x0 0x0 stop_retries=0 sbc_opcode=0 sbc_err=0 sbc_resp=0x0 0x0 0x0 0x0 sbc_retries=0 bytes_xfered=0 data_err=0 tag=0 can_retune=1 doing_retune=0 retune_now=0 need_retune=0 hold_retune=2 retune_period=0 +# kworker/2:1H-2100 [002] .... 615.693646: mmc_request_start: mmc1: start struct mmc_request[0000000097276643]: cmd_opcode=25 cmd_arg=0x33e1 cmd_flags=0xb5 cmd_retries=0 stop_opcode=12 stop_arg=0x0 stop_flags=0x49d stop_retries=0 sbc_opcode=23 sbc_arg=0x2 sbc_flags=0x15 sbc_retires=0 blocks=2 block_size=512 blk_addr=13281 data_flags=0x100 tag=1 can_retune=1 doing_retune=0 retune_now=0 need_retune=0 hold_retune=1 retune_period=0 +# -0 [000] ..s1 615.695614: mmc_request_done: mmc1: end struct mmc_request[0000000097276643]: cmd_opcode=25 cmd_err=0 cmd_resp=0x900 0x0 0x0 0x0 cmd_retries=0 stop_opcode=12 stop_err=0 stop_resp=0x0 0x0 0x0 0x0 stop_retries=0 sbc_opcode=23 sbc_err=0 sbc_resp=0x0 0x0 0x0 0x0 sbc_retries=0 bytes_xfered=1024 data_err=0 tag=1 can_retune=1 doing_retune=0 retune_now=0 need_retune=0 hold_retune=1 retune_period=0 +# kworker/2:1H-2100 [002] .... 615.695652: mmc_request_start: mmc1: start struct mmc_request[000000005c6a0141]: cmd_opcode=13 cmd_arg=0x70000 cmd_flags=0x195 cmd_retries=5 stop_opcode=0 stop_arg=0x0 stop_flags=0x0 stop_retries=0 sbc_opcode=0 sbc_arg=0x0 sbc_flags=0x0 sbc_retires=0 blocks=0 block_size=0 blk_addr=0 data_flags=0x0 tag=0 can_retune=1 doing_retune=0 retune_now=0 need_retune=0 hold_retune=2 retune_period=0