On 2022/12/2 15:44, HAGIO KAZUHITO(萩尾 一仁) wrote:
On 2022/12/01 16:01, Ding Hui wrote:We met "bt" cmd on KASAN kernel vmcore display truncated backtraces like this: crash> bt PID: 4131 TASK: ffff8001521df000 CPU: 3 COMMAND: "bash" #0 [ffff2000224b0cb0] machine_kexec_prepare at ffff2000200bff4c After digging the root cause, it turns out that arm64_in_kdump_text() found wrong bt->bptr at "machine_kexec" branch. Disassemble machine_kexec() of KASAN vmlinux (gcc 7.3.0): crash> dis -x machine_kexec 0xffff2000200bff50 <machine_kexec>: stp x29, x30, [sp,#-208]! 0xffff2000200bff54 <machine_kexec+0x4>: mov x29, sp 0xffff2000200bff58 <machine_kexec+0x8>: stp x19, x20, [sp,#16] 0xffff2000200bff5c <machine_kexec+0xc>: str x24, [sp,#56] 0xffff2000200bff60 <machine_kexec+0x10>: str x26, [sp,#72] 0xffff2000200bff64 <machine_kexec+0x14>: mov x2, #0x8ab3 0xffff2000200bff68 <machine_kexec+0x18>: add x1, x29, #0x70 0xffff2000200bff6c <machine_kexec+0x1c>: lsr x1, x1, #3 0xffff2000200bff70 <machine_kexec+0x20>: movk x2, #0x41b5, lsl #16 0xffff2000200bff74 <machine_kexec+0x24>: mov x19, #0x200000000000 0xffff2000200bff78 <machine_kexec+0x28>: adrp x3, 0xffff2000224b0000 0xffff2000200bff7c <machine_kexec+0x2c>: movk x19, #0xdfff, lsl #48 0xffff2000200bff80 <machine_kexec+0x30>: add x3, x3, #0xcb0 0xffff2000200bff84 <machine_kexec+0x34>: add x4, x1, x19 0xffff2000200bff88 <machine_kexec+0x38>: stp x2, x3, [x29,#112] 0xffff2000200bff8c <machine_kexec+0x3c>: adrp x2, 0xffff2000200bf000 <swsusp_arch_resume+0x1e8> 0xffff2000200bff90 <machine_kexec+0x40>: add x2, x2, #0xf50 0xffff2000200bff94 <machine_kexec+0x44>: str x2, [x29,#128] 0xffff2000200bff98 <machine_kexec+0x48>: mov w2, #0xf1f1f1f1 0xffff2000200bff9c <machine_kexec+0x4c>: str w2, [x1,x19] 0xffff2000200bffa0 <machine_kexec+0x50>: mov w2, #0xf200 0xffff2000200bffa4 <machine_kexec+0x54>: mov w1, #0xf3f3f3f3 0xffff2000200bffa8 <machine_kexec+0x58>: movk w2, #0xf2f2, lsl #16 0xffff2000200bffac <machine_kexec+0x5c>: stp w2, w1, [x4,#4] We notice that: 1. machine_kexec() start address is 0xffff2000200bff50 2. the instruction at machine_kexec+0x44 store the same value 0xffff2000200bff50 (comes from 0xffff2000200bf000 + 0xf50) into stack postion [x29,#128]. When arm64_in_kdump_text() search LR from stack, it met 0xffff2000200bff50 firstly, so got wrong bt->bptr. We know that the real LR is always great than the start addressSeems true. One question, do you see which kernel code stores that value?
Actually, there is no C code stores that value. The source code like this: void machine_kexec(struct kimage *kimage) { phys_addr_t reboot_code_buffer_phys; void *reboot_code_buffer; bool in_kexec_crash = (kimage == kexec_crash_image); bool stuck_cpus = cpus_are_stuck_in_kernel(); BUG_ON(!in_kexec_crash && (stuck_cpus || (num_online_cpus() > 1))); WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()), "Some CPUs may be stale, kdump will be unreliable.\n"); ... The point is CONFIG_KASAN=yI compared the gcc args when compiling machine_kexec.o between kasan eanble [1] and kasan enable but set KASAN_SANITIZE_machine_kexec.o := n [2], the difference is:
[1]: -fsanitize=kernel-address -fasan-shadow-offset=0xdfff200000000000 --param asan-globals=1 --param asan-instrumentation-with-call-threshold=10000 --param asan-stack=1
[2]: -fno-builtinIf I remove `--param asan-stack=1` but keep other asan args to compile machine_kexec.o, those assembly statement disappear.
-- Thanks, - Ding Hui
Thanks, Kazuof a function, so let's fix it by change the search conditon to (*ptr > xxx_start) && (*ptr < xxx_end). Signed-off-by: Ding Hui <dinghui@xxxxxxxxxxxxxx> --- arm64.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arm64.c b/arm64.c index c3e26a3..7e8a7db 100644 --- a/arm64.c +++ b/arm64.c @@ -3479,7 +3479,7 @@ arm64_in_kdump_text(struct bt_info *bt, struct arm64_stackframe *frame) ms = machdep->machspec; for (ptr = start - 8; ptr >= base; ptr--) { if (bt->flags & BT_OPT_BACK_TRACE) { - if ((*ptr >= ms->crash_kexec_start) && + if ((*ptr > ms->crash_kexec_start) && (*ptr < ms->crash_kexec_end) && INSTACK(*(ptr - 1), bt)) { bt->bptr = ((ulong)(ptr - 1) - (ulong)base) @@ -3488,7 +3488,7 @@ arm64_in_kdump_text(struct bt_info *bt, struct arm64_stackframe *frame) fprintf(fp, "%lx: %lx (crash_kexec)\n", bt->bptr, *ptr); return TRUE; } - if ((*ptr >= ms->crash_save_cpu_start) && + if ((*ptr > ms->crash_save_cpu_start) && (*ptr < ms->crash_save_cpu_end) && INSTACK(*(ptr - 1), bt)) { bt->bptr = ((ulong)(ptr - 1) - (ulong)base) @@ -3498,14 +3498,14 @@ arm64_in_kdump_text(struct bt_info *bt, struct arm64_stackframe *frame) return TRUE; } } else { - if ((*ptr >= ms->machine_kexec_start) && (*ptr < ms->machine_kexec_end)) { + if ((*ptr > ms->machine_kexec_start) && (*ptr < ms->machine_kexec_end)) { bt->bptr = ((ulong)ptr - (ulong)base) + task_to_stackbase(bt->tc->task); if (CRASHDEBUG(1)) fprintf(fp, "%lx: %lx (machine_kexec)\n", bt->bptr, *ptr); return TRUE; } - if ((*ptr >= ms->crash_kexec_start) && (*ptr < ms->crash_kexec_end)) { + if ((*ptr > ms->crash_kexec_start) && (*ptr < ms->crash_kexec_end)) { /* * Stash the first crash_kexec frame in case the machine_kexec * frame is not found. @@ -3519,7 +3519,7 @@ arm64_in_kdump_text(struct bt_info *bt, struct arm64_stackframe *frame) } continue; } - if ((*ptr >= ms->crash_save_cpu_start) && (*ptr < ms->crash_save_cpu_end)) { + if ((*ptr > ms->crash_save_cpu_start) && (*ptr < ms->crash_save_cpu_end)) { bt->bptr = ((ulong)ptr - (ulong)base) + task_to_stackbase(bt->tc->task); if (CRASHDEBUG(1)) @@ -3566,7 +3566,7 @@ arm64_in_kdump_text_on_irq_stack(struct bt_info *bt)for (ptr = start - 8; ptr >= base; ptr--) {if (bt->flags & BT_OPT_BACK_TRACE) { - if ((*ptr >= ms->crash_kexec_start) && + if ((*ptr > ms->crash_kexec_start) && (*ptr < ms->crash_kexec_end) && INSTACK(*(ptr - 1), bt)) { bt->bptr = ((ulong)(ptr - 1) - (ulong)base) + stackbase; @@ -3576,7 +3576,7 @@ arm64_in_kdump_text_on_irq_stack(struct bt_info *bt) FREEBUF(stackbuf); return TRUE; } - if ((*ptr >= ms->crash_save_cpu_start) && + if ((*ptr > ms->crash_save_cpu_start) && (*ptr < ms->crash_save_cpu_end) && INSTACK(*(ptr - 1), bt)) { bt->bptr = ((ulong)(ptr - 1) - (ulong)base) + stackbase; @@ -3587,7 +3587,7 @@ arm64_in_kdump_text_on_irq_stack(struct bt_info *bt) return TRUE; } } else { - if ((*ptr >= ms->crash_kexec_start) && (*ptr < ms->crash_kexec_end)) { + if ((*ptr > ms->crash_kexec_start) && (*ptr < ms->crash_kexec_end)) { bt->bptr = ((ulong)ptr - (ulong)base) + stackbase; if (CRASHDEBUG(1)) fprintf(fp, "%lx: %lx (crash_kexec on IRQ stack)\n", @@ -3595,7 +3595,7 @@ arm64_in_kdump_text_on_irq_stack(struct bt_info *bt) FREEBUF(stackbuf); return TRUE; } - if ((*ptr >= ms->crash_save_cpu_start) && (*ptr < ms->crash_save_cpu_end)) { + if ((*ptr > ms->crash_save_cpu_start) && (*ptr < ms->crash_save_cpu_end)) { bt->bptr = ((ulong)ptr - (ulong)base) + stackbase; if (CRASHDEBUG(1)) fprintf(fp, "%lx: %lx (crash_save_cpu on IRQ stack)\n",
-- Crash-utility mailing list Crash-utility@xxxxxxxxxx https://listman.redhat.com/mailman/listinfo/crash-utility Contribution Guidelines: https://github.com/crash-utility/crash/wiki