On Wed, Sep 15, 2021 at 09:51:57AM +0800, 王贇 wrote: > > + > > + if (in_exception_stack_guard((void *)address)) > > + pr_emerg("PANIC: exception stack guard: 0x%lx\n", address); > > #endif > > > > pr_emerg("PANIC: double fault, error_code: 0x%lx\n", error_code); > > > > The panic triggered as below after the stack size recovered, I found this info > could be helpful, maybe we should keep it? Could you please test this? --- Subject: x86/dumpstack/64: Add guard pages to stack_info From: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Date: Wed Sep 15 17:12:59 CEST 2021 Explicitly add the exception stack guard pages to stack_info and report on them from #DF. Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> --- arch/x86/include/asm/cpu_entry_area.h | 3 +++ arch/x86/include/asm/stacktrace.h | 3 ++- arch/x86/kernel/dumpstack_64.c | 17 ++++++++++++++++- arch/x86/kernel/traps.c | 17 ++++++++++++++++- 4 files changed, 37 insertions(+), 3 deletions(-) --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -61,6 +61,9 @@ enum exception_stack_ordering { #define CEA_ESTACK_OFFS(st) \ offsetof(struct cea_exception_stacks, st## _stack) +#define CEA_EGUARD_OFFS(st) \ + offsetof(struct cea_exception_stacks, st## _stack_guard) + #define CEA_ESTACK_PAGES \ (sizeof(struct cea_exception_stacks) / PAGE_SIZE) --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -14,13 +14,14 @@ #include <asm/switch_to.h> enum stack_type { - STACK_TYPE_UNKNOWN, + STACK_TYPE_UNKNOWN = 0, STACK_TYPE_TASK, STACK_TYPE_IRQ, STACK_TYPE_SOFTIRQ, STACK_TYPE_ENTRY, STACK_TYPE_EXCEPTION, STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1, + STACK_TYPE_GUARD = 0x80, }; struct stack_info { --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -32,9 +32,15 @@ const char *stack_type_name(enum stack_t { BUILD_BUG_ON(N_EXCEPTION_STACKS != 6); + if (type == STACK_TYPE_TASK) + return "TASK"; + if (type == STACK_TYPE_IRQ) return "IRQ"; + if (type == STACK_TYPE_SOFTIRQ) + return "SOFTIRQ"; + if (type == STACK_TYPE_ENTRY) { /* * On 64-bit, we have a generic entry stack that we @@ -63,6 +69,11 @@ struct estack_pages { }; #define EPAGERANGE(st) \ + [PFN_DOWN(CEA_EGUARD_OFFS(st))] = { \ + .offs = CEA_EGUARD_OFFS(st), \ + .size = PAGE_SIZE, \ + .type = STACK_TYPE_GUARD + \ + STACK_TYPE_EXCEPTION + ESTACK_ ##st, }, \ [PFN_DOWN(CEA_ESTACK_OFFS(st)) ... \ PFN_DOWN(CEA_ESTACK_OFFS(st) + CEA_ESTACK_SIZE(st) - 1)] = { \ .offs = CEA_ESTACK_OFFS(st), \ @@ -111,10 +122,11 @@ static __always_inline bool in_exception k = (stk - begin) >> PAGE_SHIFT; /* Lookup the page descriptor */ ep = &estack_pages[k]; - /* Guard page? */ + /* unknown entry */ if (!ep->size) return false; + begin += (unsigned long)ep->offs; end = begin + (unsigned long)ep->size; regs = (struct pt_regs *)end - 1; @@ -193,6 +205,9 @@ int get_stack_info(unsigned long *stack, if (!get_stack_info_noinstr(stack, task, info)) goto unknown; + if (info->type & STACK_TYPE_GUARD) + goto unknown; + /* * Make sure we don't iterate through any given stack more than once. * If it comes up a second time then there's something wrong going on: --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -461,6 +461,19 @@ DEFINE_IDTENTRY_DF(exc_double_fault) } #endif +#ifdef CONFIG_X86_64 + { + struct stack_info info; + + if (get_stack_info_noinstr((void *)address, current, &info) && + info.type & STACK_TYPE_GUARD) { + const char *name = stack_type_name(info.type & ~STACK_TYPE_GUARD); + pr_emerg("BUG: %s stack guard hit at %p (stack is %p..%p)\n", + name, (void *)address, info.begin, info.end); + } + } +#endif + pr_emerg("PANIC: double fault, error_code: 0x%lx\n", error_code); die("double fault", regs, error_code); panic("Machine halted."); @@ -708,7 +721,9 @@ asmlinkage __visible noinstr struct pt_r sp = regs->sp; stack = (unsigned long *)sp; - if (!get_stack_info_noinstr(stack, current, &info) || info.type == STACK_TYPE_ENTRY || + if (!get_stack_info_noinstr(stack, current, &info) || + info.type & STACK_TYPE_GUARD || + info.type == STACK_TYPE_ENTRY || info.type >= STACK_TYPE_EXCEPTION_LAST) sp = __this_cpu_ist_top_va(VC2);