From: "Madhavan T. Venkataraman" <madvenka@xxxxxxxxxxxxxxxxxxx> Unwinder changes ================ Termination =========== Currently, the unwinder terminates when both the FP (frame pointer) and the PC (return address) of a frame are 0. But a frame could get corrupted and zeroed. There needs to be a better check. The following special terminating frame and function have been defined for this purpose: const u64 arm64_last_frame[2] __attribute__ ((aligned (16))); void arm64_last_func(void) { } So, set the FP to arm64_last_frame and the PC to arm64_last_func in the bottom most frame. Exception/Interrupt detection ============================= An EL1 exception renders the stack trace unreliable as it can happen anywhere including the frame pointer prolog and epilog. The unwinder needs to be able to detect the exception on the stack. Currently, the EL1 exception handler sets up pt_regs on the stack and chains pt_regs->stackframe with the other frames on the stack. But, the unwinder does not know where this exception frame is in the stack trace. Set the LSB of the exception frame FP to allow the unwinder to detect the exception frame. When the unwinder detects the frame, it needs to make sure that it is really an exception frame and not the result of any stack corruption. It can do this if the FP and PC are also recorded elsewhere in the pt_regs for comparison. Currently, the FP is also stored in regs->regs[29]. The PC is stored in regs->pc. However, regs->pc can be changed by lower level functions. Create a new field, pt_regs->orig_pc, and record the return address PC there. With this, the unwinder can validate the exception frame and set a flag so that the caller of the unwinder can know when an exception frame is encountered. Unwinder return value ===================== Currently, the unwinder returns -EINVAL for stack trace termination as well as stack trace error. Return -ENOENT for stack trace termination and -EINVAL for error to disambiguate. This idea has been borrowed from Mark Brown. Reliable stack trace function ============================= Implement arch_stack_walk_reliable(). This function walks the stack like the existing stack trace functions with a couple of additional checks: Return address check -------------------- For each frame, check the return address to see if it is a proper kernel text address. If not, return -EINVAL. Exception frame check --------------------- Check each frame to see if it is an EL1 exception frame. If it is, return -EINVAL. Signed-off-by: Madhavan T. Venkataraman <madvenka@xxxxxxxxxxxxxxxxxxx> --- arch/arm64/include/asm/processor.h | 2 + arch/arm64/include/asm/ptrace.h | 7 ++ arch/arm64/include/asm/stacktrace.h | 5 ++ arch/arm64/kernel/asm-offsets.c | 1 + arch/arm64/kernel/entry.S | 14 +++- arch/arm64/kernel/head.S | 8 +-- arch/arm64/kernel/process.c | 12 ++++ arch/arm64/kernel/stacktrace.c | 103 +++++++++++++++++++++++++--- 8 files changed, 137 insertions(+), 15 deletions(-) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index ca2cd75d3286..d268c74d262e 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -195,6 +195,8 @@ static inline void start_thread_common(struct pt_regs *regs, unsigned long pc) memset(regs, 0, sizeof(*regs)); forget_syscall(regs); regs->pc = pc; + regs->stackframe[0] = (u64) arm64_last_frame; + regs->stackframe[1] = (u64) arm64_last_func; if (system_uses_irq_prio_masking()) regs->pmr_save = GIC_PRIO_IRQON; diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index e58bca832dff..a15750a9f6e5 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -201,8 +201,15 @@ struct pt_regs { /* Only valid for some EL1 exceptions. */ u64 lockdep_hardirqs; u64 exit_rcu; + + /* Only valid for EL1 exceptions. */ + u64 orig_pc; + u64 unused1; }; +extern const u64 arm64_last_frame[2]; +extern void arm64_last_func(void); + static inline bool in_syscall(struct pt_regs const *regs) { return regs->syscallno != NO_SYSCALL; diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index eb29b1fe8255..9760ceddbd78 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -49,6 +49,9 @@ struct stack_info { * * @graph: When FUNCTION_GRAPH_TRACER is selected, holds the index of a * replacement lr value in the ftrace graph stack. + * + * @exception_frame + * EL1 exception frame. */ struct stackframe { unsigned long fp; @@ -59,6 +62,7 @@ struct stackframe { #ifdef CONFIG_FUNCTION_GRAPH_TRACER int graph; #endif + bool exception_frame; }; extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame); @@ -169,6 +173,7 @@ static inline void start_backtrace(struct stackframe *frame, bitmap_zero(frame->stacks_done, __NR_STACK_TYPES); frame->prev_fp = 0; frame->prev_type = STACK_TYPE_UNKNOWN; + frame->exception_frame = false; } #endif /* __ASM_STACKTRACE_H */ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 301784463587..a9fbe1ca6d8a 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -75,6 +75,7 @@ int main(void) DEFINE(S_SDEI_TTBR1, offsetof(struct pt_regs, sdei_ttbr1)); DEFINE(S_PMR_SAVE, offsetof(struct pt_regs, pmr_save)); DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe)); + DEFINE(S_ORIG_PC, offsetof(struct pt_regs, orig_pc)); DEFINE(PT_REGS_SIZE, sizeof(struct pt_regs)); BLANK(); #ifdef CONFIG_COMPAT diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index c9bae73f2621..b2d6c73dd054 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -264,10 +264,21 @@ alternative_else_nop_endif * In order to be able to dump the contents of struct pt_regs at the * time the exception was taken (in case we attempt to walk the call * stack later), chain it together with the stack frames. + * + * Set up a synthetic EL0 frame such that the unwinder can recognize + * it and stop the unwind. + * + * Set up a synthetic EL1 frame such that the unwinder can recognize + * it. For a reliable stack trace, the unwinder stops here. Else, it + * continues. Also, record the return address in regs->orig_pc for + * the unwinder's benefit because regs->pc can be changed. */ .if \el == 0 - stp xzr, xzr, [sp, #S_STACKFRAME] + ldr x29, =arm64_last_frame + ldr x17, =arm64_last_func + stp x29, x17, [sp, #S_STACKFRAME] .else + orr x29, x29, #1 stp x29, x22, [sp, #S_STACKFRAME] .endif add x29, sp, #S_STACKFRAME @@ -279,6 +290,7 @@ alternative_else_nop_endif #endif stp x22, x23, [sp, #S_PC] + str x22, [sp, #S_ORIG_PC] /* Not in a syscall by default (el0_svc overwrites for real syscall) */ .if \el == 0 diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index a0dc987724ed..2cce019f29fa 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -448,8 +448,8 @@ SYM_FUNC_START_LOCAL(__primary_switched) 0: #endif add sp, sp, #16 - mov x29, #0 - mov x30, #0 + ldr x29, =arm64_last_frame + ldr x30, =arm64_last_func b start_kernel SYM_FUNC_END(__primary_switched) @@ -644,8 +644,8 @@ SYM_FUNC_START_LOCAL(__secondary_switched) cbz x2, __secondary_too_slow msr sp_el0, x2 scs_load x2, x3 - mov x29, #0 - mov x30, #0 + ldr x29, =arm64_last_frame + ldr x30, =arm64_last_func #ifdef CONFIG_ARM64_PTR_AUTH ptrauth_keys_init_cpu x2, x3, x4, x5 diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 6616486a58fe..bac13fc33914 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -380,6 +380,12 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) asmlinkage void ret_from_fork(void) asm("ret_from_fork"); +const u64 arm64_last_frame[2] __attribute__ ((aligned (16))); + +void arm64_last_func(void) +{ +} + int copy_thread(unsigned long clone_flags, unsigned long stack_start, unsigned long stk_sz, struct task_struct *p, unsigned long tls) { @@ -437,6 +443,12 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, } p->thread.cpu_context.pc = (unsigned long)ret_from_fork; p->thread.cpu_context.sp = (unsigned long)childregs; + /* + * Set up a special termination stack frame for the task. + */ + p->thread.cpu_context.fp = (unsigned long)childregs->stackframe; + childregs->stackframe[0] = (u64) arm64_last_frame; + childregs->stackframe[1] = (u64) arm64_last_func; ptrace_hw_copy_thread(p); diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index fa56af1a59c3..26ac4dd54eaf 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -18,6 +18,60 @@ #include <asm/stack_pointer.h> #include <asm/stacktrace.h> +static notrace struct pt_regs *get_frame_regs(struct task_struct *task, + struct stackframe *frame) +{ + unsigned long stackframe, regs_start, regs_end; + struct stack_info info; + + stackframe = frame->prev_fp; + if (!stackframe) + return NULL; + + (void) on_accessible_stack(task, stackframe, &info); + + regs_start = stackframe - offsetof(struct pt_regs, stackframe); + if (regs_start < info.low) + return NULL; + regs_end = regs_start + sizeof(struct pt_regs); + if (regs_end > info.high) + return NULL; + return (struct pt_regs *) regs_start; +} + +static notrace int update_frame(struct task_struct *task, + struct stackframe *frame) +{ + unsigned long lsb = frame->fp & 0xf; + unsigned long fp = frame->fp & ~lsb; + unsigned long pc = frame->pc; + struct pt_regs *regs; + + frame->exception_frame = false; + + if (fp == (unsigned long) arm64_last_frame && + pc == (unsigned long) arm64_last_func) + return -ENOENT; + + if (!lsb) + return 0; + if (lsb != 1) + return -EINVAL; + + /* + * This looks like an EL1 exception frame. + * Make sure the frame matches the EL1 pt_regs. + */ + regs = get_frame_regs(task, frame); + if (!regs || fp != READ_ONCE_NOCHECK(regs->regs[29]) || + pc != regs->orig_pc) + return -EINVAL; + + frame->exception_frame = true; + frame->fp = fp; + return 0; +} + /* * AArch64 PCS assigns the frame pointer to x29. * @@ -104,16 +158,7 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) frame->pc = ptrauth_strip_insn_pac(frame->pc); - /* - * Frames created upon entry from EL0 have NULL FP and PC values, so - * don't bother reporting these. Frames created by __noreturn functions - * might have a valid FP even if PC is bogus, so only terminate where - * both are NULL. - */ - if (!frame->fp && !frame->pc) - return -EINVAL; - - return 0; + return update_frame(tsk, frame); } NOKPROBE_SYMBOL(unwind_frame); @@ -217,4 +262,42 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, walk_stackframe(task, &frame, consume_entry, cookie); } +int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, + void *cookie, struct task_struct *task) +{ + struct stackframe frame; + int ret = 0; + + if (task == current) { + start_backtrace(&frame, + (unsigned long)__builtin_frame_address(0), + (unsigned long)arch_stack_walk_reliable); + } else { + start_backtrace(&frame, thread_saved_fp(task), + thread_saved_pc(task)); + } + + while (!ret) { + /* + * If the task encountered an EL1 exception, the stack trace + * is unreliable. + */ + if (frame.exception_frame) + return -EINVAL; + + /* + * A NULL or invalid return address probably means there's + * some generated code which __kernel_text_address() doesn't + * know about. + */ + if (!__kernel_text_address(frame.pc)) + return -EINVAL; + if (!consume_entry(cookie, frame.pc)) + return -EINVAL; + ret = unwind_frame(task, &frame); + } + + return ret == -ENOENT ? 0 : -EINVAL; +} + #endif -- 2.25.1