[RFC PATCH v1 1/1] arm64: Unwinder enhancements for reliable stack trace

From: madvenka
Date: Tue Feb 23 2021 - 13:13:51 EST


From: "Madhavan T. Venkataraman" <madvenka@xxxxxxxxxxxxxxxxxxx>

Unwinder changes
================

Termination
===========

Currently, the unwinder terminates when both the FP (frame pointer)
and the PC (return address) of a frame are 0. But a frame could get
corrupted and zeroed. There needs to be a better check.

The following special terminating frame and function have been
defined for this purpose:

const u64 arm64_last_frame[2] __attribute__ ((aligned (16)));

void arm64_last_func(void)
{
}

So, set the FP to arm64_last_frame and the PC to arm64_last_func in
the bottom most frame.

Exception/Interrupt detection
=============================

An EL1 exception renders the stack trace unreliable as it can happen
anywhere including the frame pointer prolog and epilog. The
unwinder needs to be able to detect the exception on the stack.

Currently, the EL1 exception handler sets up pt_regs on the stack
and chains pt_regs->stackframe with the other frames on the stack.
But, the unwinder does not know where this exception frame is in
the stack trace.

Set the LSB of the exception frame FP to allow the unwinder to
detect the exception frame. When the unwinder detects the frame,
it needs to make sure that it is really an exception frame and
not the result of any stack corruption.

It can do this if the FP and PC are also recorded elsewhere in the
pt_regs for comparison. Currently, the FP is also stored in
regs->regs[29]. The PC is stored in regs->pc. However, regs->pc can
be changed by lower level functions.

Create a new field, pt_regs->orig_pc, and record the return address
PC there. With this, the unwinder can validate the exception frame
and set a flag so that the caller of the unwinder can know when
an exception frame is encountered.

Unwinder return value
=====================

Currently, the unwinder returns -EINVAL for stack trace termination
as well as stack trace error. Return -ENOENT for stack trace
termination and -EINVAL for error to disambiguate. This idea has
been borrowed from Mark Brown.

Reliable stack trace function
=============================

Implement arch_stack_walk_reliable(). This function walks the stack like
the existing stack trace functions with a couple of additional checks:

Return address check
--------------------

For each frame, check the return address to see if it is a
proper kernel text address. If not, return -EINVAL.

Exception frame check
---------------------

Check each frame to see if it is an EL1 exception frame. If it is,
return -EINVAL.

Signed-off-by: Madhavan T. Venkataraman <madvenka@xxxxxxxxxxxxxxxxxxx>
---
arch/arm64/include/asm/processor.h | 2 +
arch/arm64/include/asm/ptrace.h | 7 ++
arch/arm64/include/asm/stacktrace.h | 5 ++
arch/arm64/kernel/asm-offsets.c | 1 +
arch/arm64/kernel/entry.S | 14 +++-
arch/arm64/kernel/head.S | 8 +--
arch/arm64/kernel/process.c | 12 ++++
arch/arm64/kernel/stacktrace.c | 103 +++++++++++++++++++++++++---
8 files changed, 137 insertions(+), 15 deletions(-)

diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index ca2cd75d3286..d268c74d262e 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -195,6 +195,8 @@ static inline void start_thread_common(struct pt_regs *regs, unsigned long pc)
memset(regs, 0, sizeof(*regs));
forget_syscall(regs);
regs->pc = pc;
+ regs->stackframe[0] = (u64) arm64_last_frame;
+ regs->stackframe[1] = (u64) arm64_last_func;

if (system_uses_irq_prio_masking())
regs->pmr_save = GIC_PRIO_IRQON;
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index e58bca832dff..a15750a9f6e5 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -201,8 +201,15 @@ struct pt_regs {
/* Only valid for some EL1 exceptions. */
u64 lockdep_hardirqs;
u64 exit_rcu;
+
+ /* Only valid for EL1 exceptions. */
+ u64 orig_pc;
+ u64 unused1;
};

+extern const u64 arm64_last_frame[2];
+extern void arm64_last_func(void);
+
static inline bool in_syscall(struct pt_regs const *regs)
{
return regs->syscallno != NO_SYSCALL;
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index eb29b1fe8255..9760ceddbd78 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -49,6 +49,9 @@ struct stack_info {
*
* @graph: When FUNCTION_GRAPH_TRACER is selected, holds the index of a
* replacement lr value in the ftrace graph stack.
+ *
+ * @exception_frame
+ * EL1 exception frame.
*/
struct stackframe {
unsigned long fp;
@@ -59,6 +62,7 @@ struct stackframe {
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
int graph;
#endif
+ bool exception_frame;
};

extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame);
@@ -169,6 +173,7 @@ static inline void start_backtrace(struct stackframe *frame,
bitmap_zero(frame->stacks_done, __NR_STACK_TYPES);
frame->prev_fp = 0;
frame->prev_type = STACK_TYPE_UNKNOWN;
+ frame->exception_frame = false;
}

#endif /* __ASM_STACKTRACE_H */
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 301784463587..a9fbe1ca6d8a 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -75,6 +75,7 @@ int main(void)
DEFINE(S_SDEI_TTBR1, offsetof(struct pt_regs, sdei_ttbr1));
DEFINE(S_PMR_SAVE, offsetof(struct pt_regs, pmr_save));
DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe));
+ DEFINE(S_ORIG_PC, offsetof(struct pt_regs, orig_pc));
DEFINE(PT_REGS_SIZE, sizeof(struct pt_regs));
BLANK();
#ifdef CONFIG_COMPAT
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index c9bae73f2621..b2d6c73dd054 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -264,10 +264,21 @@ alternative_else_nop_endif
* In order to be able to dump the contents of struct pt_regs at the
* time the exception was taken (in case we attempt to walk the call
* stack later), chain it together with the stack frames.
+ *
+ * Set up a synthetic EL0 frame such that the unwinder can recognize
+ * it and stop the unwind.
+ *
+ * Set up a synthetic EL1 frame such that the unwinder can recognize
+ * it. For a reliable stack trace, the unwinder stops here. Else, it
+ * continues. Also, record the return address in regs->orig_pc for
+ * the unwinder's benefit because regs->pc can be changed.
*/
.if \el == 0
- stp xzr, xzr, [sp, #S_STACKFRAME]
+ ldr x29, =arm64_last_frame
+ ldr x17, =arm64_last_func
+ stp x29, x17, [sp, #S_STACKFRAME]
.else
+ orr x29, x29, #1
stp x29, x22, [sp, #S_STACKFRAME]
.endif
add x29, sp, #S_STACKFRAME
@@ -279,6 +290,7 @@ alternative_else_nop_endif
#endif

stp x22, x23, [sp, #S_PC]
+ str x22, [sp, #S_ORIG_PC]

/* Not in a syscall by default (el0_svc overwrites for real syscall) */
.if \el == 0
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index a0dc987724ed..2cce019f29fa 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -448,8 +448,8 @@ SYM_FUNC_START_LOCAL(__primary_switched)
0:
#endif
add sp, sp, #16
- mov x29, #0
- mov x30, #0
+ ldr x29, =arm64_last_frame
+ ldr x30, =arm64_last_func
b start_kernel
SYM_FUNC_END(__primary_switched)

@@ -644,8 +644,8 @@ SYM_FUNC_START_LOCAL(__secondary_switched)
cbz x2, __secondary_too_slow
msr sp_el0, x2
scs_load x2, x3
- mov x29, #0
- mov x30, #0
+ ldr x29, =arm64_last_frame
+ ldr x30, =arm64_last_func

#ifdef CONFIG_ARM64_PTR_AUTH
ptrauth_keys_init_cpu x2, x3, x4, x5
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 6616486a58fe..bac13fc33914 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -380,6 +380,12 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)

asmlinkage void ret_from_fork(void) asm("ret_from_fork");

+const u64 arm64_last_frame[2] __attribute__ ((aligned (16)));
+
+void arm64_last_func(void)
+{
+}
+
int copy_thread(unsigned long clone_flags, unsigned long stack_start,
unsigned long stk_sz, struct task_struct *p, unsigned long tls)
{
@@ -437,6 +443,12 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
}
p->thread.cpu_context.pc = (unsigned long)ret_from_fork;
p->thread.cpu_context.sp = (unsigned long)childregs;
+ /*
+ * Set up a special termination stack frame for the task.
+ */
+ p->thread.cpu_context.fp = (unsigned long)childregs->stackframe;
+ childregs->stackframe[0] = (u64) arm64_last_frame;
+ childregs->stackframe[1] = (u64) arm64_last_func;

ptrace_hw_copy_thread(p);

diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index fa56af1a59c3..26ac4dd54eaf 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -18,6 +18,60 @@
#include <asm/stack_pointer.h>
#include <asm/stacktrace.h>

+static notrace struct pt_regs *get_frame_regs(struct task_struct *task,
+ struct stackframe *frame)
+{
+ unsigned long stackframe, regs_start, regs_end;
+ struct stack_info info;
+
+ stackframe = frame->prev_fp;
+ if (!stackframe)
+ return NULL;
+
+ (void) on_accessible_stack(task, stackframe, &info);
+
+ regs_start = stackframe - offsetof(struct pt_regs, stackframe);
+ if (regs_start < info.low)
+ return NULL;
+ regs_end = regs_start + sizeof(struct pt_regs);
+ if (regs_end > info.high)
+ return NULL;
+ return (struct pt_regs *) regs_start;
+}
+
+static notrace int update_frame(struct task_struct *task,
+ struct stackframe *frame)
+{
+ unsigned long lsb = frame->fp & 0xf;
+ unsigned long fp = frame->fp & ~lsb;
+ unsigned long pc = frame->pc;
+ struct pt_regs *regs;
+
+ frame->exception_frame = false;
+
+ if (fp == (unsigned long) arm64_last_frame &&
+ pc == (unsigned long) arm64_last_func)
+ return -ENOENT;
+
+ if (!lsb)
+ return 0;
+ if (lsb != 1)
+ return -EINVAL;
+
+ /*
+ * This looks like an EL1 exception frame.
+ * Make sure the frame matches the EL1 pt_regs.
+ */
+ regs = get_frame_regs(task, frame);
+ if (!regs || fp != READ_ONCE_NOCHECK(regs->regs[29]) ||
+ pc != regs->orig_pc)
+ return -EINVAL;
+
+ frame->exception_frame = true;
+ frame->fp = fp;
+ return 0;
+}
+
/*
* AArch64 PCS assigns the frame pointer to x29.
*
@@ -104,16 +158,7 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)

frame->pc = ptrauth_strip_insn_pac(frame->pc);

- /*
- * Frames created upon entry from EL0 have NULL FP and PC values, so
- * don't bother reporting these. Frames created by __noreturn functions
- * might have a valid FP even if PC is bogus, so only terminate where
- * both are NULL.
- */
- if (!frame->fp && !frame->pc)
- return -EINVAL;
-
- return 0;
+ return update_frame(tsk, frame);
}
NOKPROBE_SYMBOL(unwind_frame);

@@ -217,4 +262,42 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
walk_stackframe(task, &frame, consume_entry, cookie);
}

+int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
+ void *cookie, struct task_struct *task)
+{
+ struct stackframe frame;
+ int ret = 0;
+
+ if (task == current) {
+ start_backtrace(&frame,
+ (unsigned long)__builtin_frame_address(0),
+ (unsigned long)arch_stack_walk_reliable);
+ } else {
+ start_backtrace(&frame, thread_saved_fp(task),
+ thread_saved_pc(task));
+ }
+
+ while (!ret) {
+ /*
+ * If the task encountered an EL1 exception, the stack trace
+ * is unreliable.
+ */
+ if (frame.exception_frame)
+ return -EINVAL;
+
+ /*
+ * A NULL or invalid return address probably means there's
+ * some generated code which __kernel_text_address() doesn't
+ * know about.
+ */
+ if (!__kernel_text_address(frame.pc))
+ return -EINVAL;
+ if (!consume_entry(cookie, frame.pc))
+ return -EINVAL;
+ ret = unwind_frame(task, &frame);
+ }
+
+ return ret == -ENOENT ? 0 : -EINVAL;
+}
+
#endif
--
2.25.1