[PATCH 3/4] tracing/histogram: Fix stacktrace key

From: Tom Zanussi
Date: Fri Feb 10 2023 - 16:33:29 EST


The current code will always use the current stacktrace as a key even
if a stacktrace contained in a specific event field was specified.

For example, we expect to use the 'unsigned long[] stack' field in the
below event in the histogram:

# echo 's:block_lat pid_t pid; u64 delta; unsigned long[] stack;' > /sys/kernel/debug/tracing/dynamic_events
# echo 'hist:keys=delta.buckets=100,stack.stacktrace:sort=delta' > /sys/kernel/debug/tracing/events/synthetic/block_lat/trigger

But in fact, when we type out the trigger, we see that it's using the
plain old global 'stacktrace' as the key, which is just the stacktrace
when the event was hit and not the stacktrace contained in the event,
which is what we want:

# cat /sys/kernel/debug/tracing/events/synthetic/block_lat/trigger
hist:keys=delta.buckets=100,stacktrace:vals=hitcount:sort=delta.buckets=100:size=2048 [active]

And in fact, there's no code to actually retrieve it from the event,
so we need to add HIST_FIELD_FN_STACK and hist_field_stack() to get it
and hook it into the trigger code. For now, since the stack is just
using dynamic strings, this could just use the dynamic string
function, but it seems cleaner to have a dedicated function an be able
to tweak independently as necessary.

Signed-off-by: Tom Zanussi <zanussi@xxxxxxxxxx>
---
kernel/trace/trace_events_hist.c | 34 +++++++++++++++++++++++++++++---
1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index a58380702491..eb812cfdaa88 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -135,6 +135,7 @@ enum hist_field_fn {
HIST_FIELD_FN_DIV_NOT_POWER2,
HIST_FIELD_FN_DIV_MULT_SHIFT,
HIST_FIELD_FN_EXECNAME,
+ HIST_FIELD_FN_STACK,
};

/*
@@ -1982,7 +1983,10 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
}

if (flags & HIST_FIELD_FL_STACKTRACE) {
- hist_field->fn_num = HIST_FIELD_FN_NOP;
+ if (field)
+ hist_field->fn_num = HIST_FIELD_FN_STACK;
+ else
+ hist_field->fn_num = HIST_FIELD_FN_NOP;
hist_field->size = HIST_STACKTRACE_SIZE;
hist_field->type = kstrdup_const("unsigned long[]", GFP_KERNEL);
if (!hist_field->type)
@@ -4269,6 +4273,19 @@ static u64 hist_field_execname(struct hist_field *hist_field,
return (u64)(unsigned long)(elt_data->comm);
}

+static u64 hist_field_stack(struct hist_field *hist_field,
+ struct tracing_map_elt *elt,
+ struct trace_buffer *buffer,
+ struct ring_buffer_event *rbe,
+ void *event)
+{
+ u32 str_item = *(u32 *)(event + hist_field->field->offset);
+ int str_loc = str_item & 0xffff;
+ char *addr = (char *)(event + str_loc);
+
+ return (u64)(unsigned long)addr;
+}
+
static u64 hist_fn_call(struct hist_field *hist_field,
struct tracing_map_elt *elt,
struct trace_buffer *buffer,
@@ -4332,6 +4349,8 @@ static u64 hist_fn_call(struct hist_field *hist_field,
return div_by_mult_and_shift(hist_field, elt, buffer, rbe, event);
case HIST_FIELD_FN_EXECNAME:
return hist_field_execname(hist_field, elt, buffer, rbe, event);
+ case HIST_FIELD_FN_STACK:
+ return hist_field_stack(hist_field, elt, buffer, rbe, event);
default:
return 0;
}
@@ -5233,8 +5252,17 @@ static void event_hist_trigger(struct event_trigger_data *data,

if (key_field->flags & HIST_FIELD_FL_STACKTRACE) {
memset(entries, 0, HIST_STACKTRACE_SIZE);
- stack_trace_save(entries, HIST_STACKTRACE_DEPTH,
- HIST_STACKTRACE_SKIP);
+ if (key_field->field) {
+ unsigned long *stack, n_entries;
+
+ field_contents = hist_fn_call(key_field, elt, buffer, rbe, rec);
+ stack = (unsigned long *)field_contents;
+ n_entries = *stack;
+ memcpy(entries, ++stack, n_entries * sizeof(unsigned long));
+ } else {
+ stack_trace_save(entries, HIST_STACKTRACE_DEPTH,
+ HIST_STACKTRACE_SKIP);
+ }
key = entries;
} else {
field_contents = hist_fn_call(key_field, elt, buffer, rbe, rec);
--
2.34.1