[PATCH v16 10/10] perf test: Add direct off-cpu tests

From: Howard Chu
Date: Wed Apr 30 2025 - 22:30:51 EST


Since we added --off-cpu-thresh, add tests for when a sample's off-cpu
time is above the threshold, and when it's below the threshold.

Note that the basic test performed in test_offcpu_basic() collects a
direct sample now, since sleep 1 has duration of 1000ms, higher than the
default value of --off-cpu-thresh of 500ms, resulting in a direct
sample.

An example:

$ sudo perf test offcpu
124: perf record offcpu profiling tests : Ok

Tested-by: Gautam Menghani <gautam@xxxxxxxxxxxxx>
Acked-by: Namhyung Kim <namhyung@xxxxxxxxxx>
Reviewed-by: Ian Rogers <irogers@xxxxxxxxxx>
Suggested-by: Namhyung Kim <namhyung@xxxxxxxxxx>
Signed-off-by: Howard Chu <howardchu95@xxxxxxxxx>
---
tools/perf/tests/shell/record_offcpu.sh | 71 +++++++++++++++++++++++++
1 file changed, 71 insertions(+)

diff --git a/tools/perf/tests/shell/record_offcpu.sh b/tools/perf/tests/shell/record_offcpu.sh
index 678947fe69ee..21a22efe08f5 100755
--- a/tools/perf/tests/shell/record_offcpu.sh
+++ b/tools/perf/tests/shell/record_offcpu.sh
@@ -7,6 +7,9 @@ set -e
err=0
perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX)

+ts=$(printf "%u" $((~0 << 32))) # OFF_CPU_TIMESTAMP
+dummy_timestamp=${ts%???} # remove the last 3 digits to match perf script
+
cleanup() {
rm -f ${perfdata}
rm -f ${perfdata}.old
@@ -19,6 +22,9 @@ trap_cleanup() {
}
trap trap_cleanup EXIT TERM INT

+test_above_thresh="Threshold test (above threshold)"
+test_below_thresh="Threshold test (below threshold)"
+
test_offcpu_priv() {
echo "Checking off-cpu privilege"

@@ -88,6 +94,63 @@ test_offcpu_child() {
echo "Child task off-cpu test [Success]"
}

+# task blocks longer than the --off-cpu-thresh, perf should collect a direct sample
+test_offcpu_above_thresh() {
+ echo "${test_above_thresh}"
+
+ # collect direct off-cpu samples for tasks blocked for more than 999ms
+ if ! perf record -e dummy --off-cpu --off-cpu-thresh 999 -o ${perfdata} -- sleep 1 2> /dev/null
+ then
+ echo "${test_above_thresh} [Failed record]"
+ err=1
+ return
+ fi
+ # direct sample's timestamp should be lower than the dummy_timestamp of the at-the-end sample
+ # check if a direct sample exists
+ if ! perf script --time "0, ${dummy_timestamp}" -i ${perfdata} -F event | grep -q "offcpu-time"
+ then
+ echo "${test_above_thresh} [Failed missing direct samples]"
+ err=1
+ return
+ fi
+ # there should only be one direct sample, and its period should be higher than off-cpu-thresh
+ if ! perf script --time "0, ${dummy_timestamp}" -i ${perfdata} -F period | \
+ awk '{ if (int($1) > 999000000) exit 0; else exit 1; }'
+ then
+ echo "${test_above_thresh} [Failed off-cpu time too short]"
+ err=1
+ return
+ fi
+ echo "${test_above_thresh} [Success]"
+}
+
+# task blocks shorter than the --off-cpu-thresh, perf should collect an at-the-end sample
+test_offcpu_below_thresh() {
+ echo "${test_below_thresh}"
+
+ # collect direct off-cpu samples for tasks blocked for more than 1.2s
+ if ! perf record -e dummy --off-cpu --off-cpu-thresh 1200 -o ${perfdata} -- sleep 1 2> /dev/null
+ then
+ echo "${test_below_thresh} [Failed record]"
+ err=1
+ return
+ fi
+ # see if there's an at-the-end sample
+ if ! perf script --time "${dummy_timestamp}," -i ${perfdata} -F event | grep -q 'offcpu-time'
+ then
+ echo "${test_below_thresh} [Failed at-the-end samples cannot be found]"
+ err=1
+ return
+ fi
+ # plus there shouldn't be any direct samples
+ if perf script --time "0, ${dummy_timestamp}" -i ${perfdata} -F event | grep -q 'offcpu-time'
+ then
+ echo "${test_below_thresh} [Failed direct samples are found when they shouldn't be]"
+ err=1
+ return
+ fi
+ echo "${test_below_thresh} [Success]"
+}

test_offcpu_priv

@@ -99,5 +162,13 @@ if [ $err = 0 ]; then
test_offcpu_child
fi

+if [ $err = 0 ]; then
+ test_offcpu_above_thresh
+fi
+
+if [ $err = 0 ]; then
+ test_offcpu_below_thresh
+fi
+
cleanup
exit $err
--
2.45.2