On Thu, Sep 16, 2021 at 10:03:19AM +0200, Peter Zijlstra wrote: > Oh, I'm an idiot... yes it tries to read regs the stack, but clearly > that won't work for the guard page. OK, extended it to also cover task and IRQ stacks. get_stack_info() doesn't seem to know about SOFTIRQ stacks on 64bit, might have to look into that next. Andy, what's the story with page_fault_oops(), according to the comment in exc_double_fault() actual stack overflows will always hit #DF. --- diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index 3d52b094850a..c4e92462c2b4 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -61,6 +61,9 @@ enum exception_stack_ordering { #define CEA_ESTACK_OFFS(st) \ offsetof(struct cea_exception_stacks, st## _stack) +#define CEA_EGUARD_OFFS(st) \ + offsetof(struct cea_exception_stacks, st## _stack_guard) + #define CEA_ESTACK_PAGES \ (sizeof(struct cea_exception_stacks) / PAGE_SIZE) diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index f248eb2ac2d4..8ff346579330 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -14,13 +14,14 @@ #include <asm/switch_to.h> enum stack_type { - STACK_TYPE_UNKNOWN, + STACK_TYPE_UNKNOWN = 0, STACK_TYPE_TASK, STACK_TYPE_IRQ, STACK_TYPE_SOFTIRQ, STACK_TYPE_ENTRY, STACK_TYPE_EXCEPTION, STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1, + STACK_TYPE_GUARD = 0x80, }; struct stack_info { @@ -31,6 +32,15 @@ struct stack_info { bool in_task_stack(unsigned long *stack, struct task_struct *task, struct stack_info *info); +static __always_inline bool in_stack_guard(void *addr, void *begin, void *end) +{ +#ifdef CONFIG_VMAP_STACK + if (addr > (begin - PAGE_SIZE)) + return true; +#endif + return false; +} + bool in_entry_stack(unsigned long *stack, struct stack_info *info); int get_stack_info(unsigned long *stack, struct task_struct *task, diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index ea4fe192189d..91b406fe2a39 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -32,12 +32,19 @@ static struct pt_regs exec_summary_regs; bool noinstr in_task_stack(unsigned long *stack, struct task_struct *task, struct stack_info *info) { - unsigned long *begin = task_stack_page(task); - unsigned long *end = task_stack_page(task) + THREAD_SIZE; - - if (stack < begin || stack >= end) + void *begin = task_stack_page(task); + void *end = begin + THREAD_SIZE; + int type = STACK_TYPE_TASK; + + if ((void *)stack < begin || (void *)stack >= end) { + if (in_stack_guard(stack, begin, end)) { + type |= STACK_TYPE_GUARD; + goto fill_info; + } return false; + } +fill_info: info->type = STACK_TYPE_TASK; info->begin = begin; info->end = end; @@ -50,14 +57,20 @@ bool noinstr in_task_stack(unsigned long *stack, struct task_struct *task, bool noinstr in_entry_stack(unsigned long *stack, struct stack_info *info) { struct entry_stack *ss = cpu_entry_stack(smp_processor_id()); - + int type = STACK_TYPE_ENTRY; void *begin = ss; void *end = ss + 1; - if ((void *)stack < begin || (void *)stack >= end) + if ((void *)stack < begin || (void *)stack >= end) { + if (in_stack_guard(stack, begin, end)) { + type |= STACK_TYPE_GUARD; + goto fill_info; + } return false; + } - info->type = STACK_TYPE_ENTRY; +fill_info: + info->type = type; info->begin = begin; info->end = end; info->next_sp = NULL; diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 5601b95944fa..3634bdf9ab36 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -32,9 +32,15 @@ const char *stack_type_name(enum stack_type type) { BUILD_BUG_ON(N_EXCEPTION_STACKS != 6); + if (type == STACK_TYPE_TASK) + return "TASK"; + if (type == STACK_TYPE_IRQ) return "IRQ"; + if (type == STACK_TYPE_SOFTIRQ) + return "SOFTIRQ"; + if (type == STACK_TYPE_ENTRY) { /* * On 64-bit, we have a generic entry stack that we @@ -63,6 +69,11 @@ struct estack_pages { }; #define EPAGERANGE(st) \ + [PFN_DOWN(CEA_EGUARD_OFFS(st))] = { \ + .offs = CEA_EGUARD_OFFS(st), \ + .size = PAGE_SIZE, \ + .type = STACK_TYPE_GUARD + \ + STACK_TYPE_EXCEPTION + ESTACK_ ##st, }, \ [PFN_DOWN(CEA_ESTACK_OFFS(st)) ... \ PFN_DOWN(CEA_ESTACK_OFFS(st) + CEA_ESTACK_SIZE(st) - 1)] = { \ .offs = CEA_ESTACK_OFFS(st), \ @@ -111,7 +122,7 @@ static __always_inline bool in_exception_stack(unsigned long *stack, struct stac k = (stk - begin) >> PAGE_SHIFT; /* Lookup the page descriptor */ ep = &estack_pages[k]; - /* Guard page? */ + /* unknown entry */ if (!ep->size) return false; @@ -122,7 +133,12 @@ static __always_inline bool in_exception_stack(unsigned long *stack, struct stac info->type = ep->type; info->begin = (unsigned long *)begin; info->end = (unsigned long *)end; - info->next_sp = (unsigned long *)regs->sp; + info->next_sp = NULL; + + /* Can't read regs from a guard page. */ + if (!(ep->type & STACK_TYPE_GUARD)) + info->next_sp = (unsigned long *)regs->sp; + return true; } @@ -130,6 +146,7 @@ static __always_inline bool in_irq_stack(unsigned long *stack, struct stack_info { unsigned long *end = (unsigned long *)this_cpu_read(hardirq_stack_ptr); unsigned long *begin; + int type = STACK_TYPE_IRQ; /* * @end points directly to the top most stack entry to avoid a -8 @@ -144,19 +161,27 @@ static __always_inline bool in_irq_stack(unsigned long *stack, struct stack_info * final operation is 'popq %rsp' which means after that RSP points * to the original stack and not to @end. */ - if (stack < begin || stack >= end) + if (stack < begin || stack >= end) { + if (in_stack_guard(stack, begin, end)) { + type |= STACK_TYPE_GUARD; + goto fill_info; + } return false; + } - info->type = STACK_TYPE_IRQ; +fill_info: + info->type = type; info->begin = begin; info->end = end; + info->next_sp = NULL; /* * The next stack pointer is stored at the top of the irq stack * before switching to the irq stack. Actual stack entries are all * below that. */ - info->next_sp = (unsigned long *)*(end - 1); + if (!(type & STACK_TYPE_GUARD)) + info->next_sp = (unsigned long *)*(end - 1); return true; } @@ -193,6 +218,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task, if (!get_stack_info_noinstr(stack, task, info)) goto unknown; + if (info->type & STACK_TYPE_GUARD) + goto unknown; + /* * Make sure we don't iterate through any given stack more than once. * If it comes up a second time then there's something wrong going on: diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index a58800973aed..80f6d8d735eb 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -353,6 +353,7 @@ DEFINE_IDTENTRY_DF(exc_double_fault) #ifdef CONFIG_VMAP_STACK unsigned long address = read_cr2(); + struct stack_info info; #endif #ifdef CONFIG_X86_ESPFIX64 @@ -455,9 +456,11 @@ DEFINE_IDTENTRY_DF(exc_double_fault) * stack even if the actual trigger for the double fault was * something else. */ - if ((unsigned long)task_stack_page(tsk) - 1 - address < PAGE_SIZE) { - handle_stack_overflow("kernel stack overflow (double-fault)", - regs, address); + if (get_stack_info_noinstr((void *)address, current, &info) && + info.type & STACK_TYPE_GUARD) { + const char *name = stack_type_name(info.type & ~STACK_TYPE_GUARD); + pr_emerg("BUG: %s stack guard hit at %p (stack is %p..%p)\n", + name, (void *)address, info.begin, info.end); } #endif @@ -708,7 +711,9 @@ asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *r sp = regs->sp; stack = (unsigned long *)sp; - if (!get_stack_info_noinstr(stack, current, &info) || info.type == STACK_TYPE_ENTRY || + if (!get_stack_info_noinstr(stack, current, &info) || + info.type & STACK_TYPE_GUARD || + info.type == STACK_TYPE_ENTRY || info.type >= STACK_TYPE_EXCEPTION_LAST) sp = __this_cpu_ist_top_va(VC2);