Re: [PATCH v11 1/5] arm64: Call stack_backtrace() only from within walk_stackframe()

From: Mark Rutland
Date: Tue Nov 30 2021 - 13:29:37 EST


On Tue, Nov 30, 2021 at 11:13:28AM -0600, Madhavan T. Venkataraman wrote:
> On 11/30/21 9:05 AM, Mark Rutland wrote:
> > On Tue, Nov 23, 2021 at 01:37:19PM -0600, madvenka@xxxxxxxxxxxxxxxxxxx wrote:
> >> From: "Madhavan T. Venkataraman" <madvenka@xxxxxxxxxxxxxxxxxxx>
> >>
> >> Currently, arch_stack_walk() calls start_backtrace() and walk_stackframe()
> >> separately. There is no need to do that. Instead, call start_backtrace()
> >> from within walk_stackframe(). In other words, walk_stackframe() is the only
> >> unwind function a consumer needs to call.

> >> @@ -143,15 +140,19 @@ static int notrace unwind_frame(struct task_struct *tsk,
> >> NOKPROBE_SYMBOL(unwind_frame);
> >>
> >> static void notrace walk_stackframe(struct task_struct *tsk,
> >> - struct stackframe *frame,
> >> + unsigned long fp, unsigned long pc,
> >> bool (*fn)(void *, unsigned long), void *data)
> >> {
> >> + struct stackframe frame;
> >> +
> >> + start_backtrace(&frame, fp, pc);
> >> +
> >> while (1) {
> >> int ret;
> >>
> >> - if (!fn(data, frame->pc))
> >> + if (!fn(data, frame.pc))
> >> break;
> >> - ret = unwind_frame(tsk, frame);
> >> + ret = unwind_frame(tsk, &frame);
> >> if (ret < 0)
> >> break;
> >> }
> >> @@ -195,17 +196,19 @@ noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,
> >> void *cookie, struct task_struct *task,
> >> struct pt_regs *regs)
> >> {
> >> - struct stackframe frame;
> >> -
> >> - if (regs)
> >> - start_backtrace(&frame, regs->regs[29], regs->pc);
> >> - else if (task == current)
> >> - start_backtrace(&frame,
> >> - (unsigned long)__builtin_frame_address(1),
> >> - (unsigned long)__builtin_return_address(0));
> >> - else
> >> - start_backtrace(&frame, thread_saved_fp(task),
> >> - thread_saved_pc(task));
> >> -
> >> - walk_stackframe(task, &frame, consume_entry, cookie);
> >> + unsigned long fp, pc;
> >> +
> >> + if (regs) {
> >> + fp = regs->regs[29];
> >> + pc = regs->pc;
> >> + } else if (task == current) {
> >> + /* Skip arch_stack_walk() in the stack trace. */
> >> + fp = (unsigned long)__builtin_frame_address(1);
> >> + pc = (unsigned long)__builtin_return_address(0);
> >> + } else {
> >> + /* Caller guarantees that the task is not running. */
> >> + fp = thread_saved_fp(task);
> >> + pc = thread_saved_pc(task);
> >> + }
> >> + walk_stackframe(task, fp, pc, consume_entry, cookie);
> >
> > I'd prefer to leave this as-is. The new and old structure are largely
> > equivalent, so we haven't made this any simpler, but we have added more
> > arguments to walk_stackframe().
> >
>
> This is just to simplify things when we eventually add arch_stack_walk_reliable().
> That is all. All of the unwinding is done by a single unwinding function and
> there are two consumers of that unwinding function - arch_stack_walk() and
> arch_stack_walk_reliable().

I understand the theory, but I don't think that moving the start_backtrace()
call actually simplifies this in a meaningful way, and I think it'll make it
harder for us to make more meaningful simplifications later on.

As of patch 4 of this series, we'll have:

| noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,
| void *cookie, struct task_struct *task,
| struct pt_regs *regs)
| {
| unsigned long fp, pc;
|
| if (regs) {
| fp = regs->regs[29];
| pc = regs->pc;
| } else if (task == current) {
| /* Skip arch_stack_walk() in the stack trace. */
| fp = (unsigned long)__builtin_frame_address(1);
| pc = (unsigned long)__builtin_return_address(0);
| } else {
| /* Caller guarantees that the task is not running. */
| fp = thread_saved_fp(task);
| pc = thread_saved_pc(task);
| }
| walk_stackframe(task, fp, pc, consume_entry, cookie);
| }
|
| noinline int notrace arch_stack_walk_reliable(stack_trace_consume_fn consume_fn,
| void *cookie,
| struct task_struct *task)
| {
| unsigned long fp, pc;
|
| if (task == current) {
| /* Skip arch_stack_walk_reliable() in the stack trace. */
| fp = (unsigned long)__builtin_frame_address(1);
| pc = (unsigned long)__builtin_return_address(0);
| } else {
| /* Caller guarantees that the task is not running. */
| fp = thread_saved_fp(task);
| pc = thread_saved_pc(task);
| }
| if (unwind(task, fp, pc, consume_fn, cookie))
| return 0;
| return -EINVAL;
| }

Which I do not think is substantially simpler than the naive extrapolation from
what we currently have, e.g.

| noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,
| void *cookie, struct task_struct *task,
| struct pt_regs *regs)
| {
| struct stackframe frame;
|
| if (regs) {
| unwind_init(&frame, regs->regs[29], regs->pc)
| } else if (task == current) {
| unwind_init(&frame, __builtin_frame_address(1),
| __builtin_return_address(0));
| } else {
| unwind_init(&frame, thread_saved_fp(task),
| thread_saved_pc(task);
| }
| walk_stackframe(task, &frame, consume_entry, cookie);
| }
|
| noinline int notrace arch_stack_walk_reliable(stack_trace_consume_fn consume_fn,
| void *cookie,
| struct task_struct *task)
| {
| struct stackframe frame;
|
| if (task == current) {
| unwind_init(&frame, __builtin_frame_address(1),
| __builtin_return_address(0));
| } else {
| unwind_init(&frame, thread_saved_fp(task),
| thread_saved_pc(task);
| }
| if (unwind(task, &frame, consume_fn, cookie))
| return 0;
| return -EINVAL;
| }

Further, I think we can factor this in a different way to reduce the
duplication:

| /*
| * TODO: document requirements here
| */
| static inline void unwind_init_from_current_regs(struct stackframe *frame,
| struct pt_regs *regs)
| {
| unwind_init(frame, regs->regs[29], regs->pc);
| }
|
| /*
| * TODO: document requirements here
| */
| static inline void unwind_init_from_blocked_task(struct stackframe *frame,
| struct task_struct *tsk)
| {
| unwind_init(&frame, thread_saved_fp(task),
| thread_saved_pc(task));
| }
|
| /*
| * TODO: document requirements here
| *
| * Note: this is always inlined, and we expect our caller to be a noinline
| * function, such that this starts from our caller's caller.
| */
| static __always_inline void unwind_init_from_caller(struct stackframe *frame)
| {
| unwind_init(frame, __builtin_frame_address(1),
| __builtin_return_address(0));
| }
|
| noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,
| void *cookie, struct task_struct *task,
| struct pt_regs *regs)
| {
| struct stackframe frame;
|
| if (regs)
| unwind_init_current_regs(&frame, regs);
| else if (task == current)
| unwind_init_from_caller(&frame);
| else
| unwind_init_blocked_task(&frame, task);
|
| unwind(task, &frame, consume_entry, cookie);
| }
|
| noinline int notrace arch_stack_walk_reliable(stack_trace_consume_fn consume_fn,
| void *cookie,
| struct task_struct *task)
| {
| struct stackframe frame;
|
| if (task == current)
| unwind_init_from_caller(&frame);
| else
| unwind_init_from_blocked_task(&frame, task);
|
| if (unwind(task, &frame, consume_fn, cookie))
| return 0;
| return -EINVAL;
| }

... which minimizes the duplication and allows us to add specialized
initialization for each case if necessary, which I believe we will need in
future to make unwinding across exception boundaries (such as when starting
with regs) more useful.

Thanks,
Mark.