Re: [PATCH] Fix for "bt" command prints a bogus warning

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2023/02/09 21:35, lijiang wrote:> On Tue, Feb 7, 2023 at 6:08 PM lijiang <lijiang@xxxxxxxxxx <mailto:lijiang@xxxxxxxxxx>> wrote:
> 
>         more important thing is how we determine the irq_eframe_link.
> 
>     The following patch can work on upstream kernel vmcore and RHEL9 vmcore.
>     Maybe we can check the symbols asm_common_interrupt and asm_call_on_stack as below:
> 
>     diff --git a/x86_64.c b/x86_64.c
>     index 7a5d6f050c89..62036f71f632 100644
>     --- a/x86_64.c
>     +++ b/x86_64.c
>     @@ -3938,6 +3938,11 @@ in_exception_stack:
>               if (irq_eframe) {
>                       bt->flags |= BT_EXCEPTION_FRAME;
>                       i = (irq_eframe - bt->stackbase)/sizeof(ulong);
>     + if (symbol_exists("asm_common_interrupt")) {
>     + i -= 1;
>     + up = (ulong *)(&bt->stackbuf[i*sizeof(ulong)]);
>     + bt->instptr = *up;
>     + }
>                       x86_64_print_stack_entry(bt, ofp, level, i, bt->instptr);
>                       bt->flags &= ~(ulonglong)BT_EXCEPTION_FRAME;
>                       cs = x86_64_exception_frame(EFRAME_PRINT|EFRAME_CS, 0,
>     @@ -6521,6 +6526,16 @@ x86_64_irq_eframe_link_init(void)
>        else
>        return;
> 
>     + if (symbol_exists("asm_common_interrupt") && !symbol_exists("asm_call_on_stack")) {
>     + machdep->machspec->irq_eframe_link =-32;
>     + return;
>     + }
>     +
>     + if (symbol_exists("asm_common_interrupt") && symbol_exists("asm_call_on_stack")) {
>     + machdep->machspec->irq_eframe_link =-56;
>     + return;
>     + }
>     +
>        if (THIS_KERNEL_VERSION < LINUX(2,6,9))
>        return;
> 
> 
> Do you have any other comments about the above changes? Or still looking for a better solution, any thoughts? Kazu.

Thank you for the information.  I've looked for a way to follow
kernel changes, but it would be hard with the current unwinder.
So for now, let's go with this?

+       if (symbol_exists("asm_common_interrupt")) {
+               if (symbol_exists("asm_call_on_stack"))
+                       machdep->machspec->irq_eframe_link = -64;
+               else
+                       machdep->machspec->irq_eframe_link = -32;
+               return;
+       }

Probably the actual irq_eframe_link for your kernel is -64, I
think it's adjusted in x86_64_irq_eframe_link().  Please check
"help -m".  e.g. a 5.8 vmcore on hand:

crash-dev> bt
...
crash-dev> help -m | grep irq_eframe
           irq_eframe_link: -64


The following is the reason why the irq_eframe_link is -32 on
RHEL9.1 (5.14.0-162.6.1.el9_1.x86_64).

crash-dev> dis asm_common_interrupt
0xffffffff87a00cc0 <asm_common_interrupt>:      clac
0xffffffff87a00cc3 <asm_common_interrupt+3>:    cld
0xffffffff87a00cc4 <asm_common_interrupt+4>:    call   0xffffffff87a012b0 <error_entry> [1]
0xffffffff87a00cc9 <asm_common_interrupt+9>:    mov    %rax,%rsp                        [2]
0xffffffff87a00ccc <asm_common_interrupt+12>:   mov    %rsp,%rdi
0xffffffff87a00ccf <asm_common_interrupt+15>:   mov    0x78(%rsp),%rsi
0xffffffff87a00cd4 <asm_common_interrupt+20>:   movq   $0xffffffffffffffff,0x78(%rsp)
0xffffffff87a00cdd <asm_common_interrupt+29>:   call   0xffffffff878d8370 <common_interrupt> [3]
0xffffffff87a00ce2 <asm_common_interrupt+34>:   jmp    0xffffffff87a013d0 <error_return>

The asm_common_interrupt calls the error_entry first [1], which pushes
registers (pt_regs) to the stack and returns the address:

         leaq    8(%rsp), %rax                   /* return pt_regs pointer */

Then it writes the address in %rax to %rsp [2].  After that, it calls
the common_interrupt [3], so now *(%rsp) = asm_common_interrupt+34 and
*(%rsp+8) = pt_regs.

crash-dev> dis common_interrupt
0xffffffff878d8370 <common_interrupt>:          push   %r13       [4]
0xffffffff878d8372 <common_interrupt+2>:        push   %r12       [4]
0xffffffff878d8374 <common_interrupt+4>:        mov    %rsi,%r12
0xffffffff878d8377 <common_interrupt+7>:        push   %rbp       [4]
0xffffffff878d8378 <common_interrupt+8>:        mov    %rdi,%rbp
0xffffffff878d837b <common_interrupt+11>:       movzbl %r12b,%r12d
0xffffffff878d837f <common_interrupt+15>:       call   0xffffffff878dab00 <irqentry_enter>
0xffffffff878d8384 <common_interrupt+20>:       mov    %eax,%r13d
0xffffffff878d8387 <common_interrupt+23>:       movb   $0x1,%gs:0x787522f3(%rip)        # 0x2a682
0xffffffff878d838f <common_interrupt+31>:       testb  $0x3,0x88(%rbp)
0xffffffff878d8396 <common_interrupt+38>:       jne    0xffffffff878d83a3 <common_interrupt+51>  [5]
0xffffffff878d8398 <common_interrupt+40>:       mov    %gs:0x78740b65(%rip),%al        # 0x18f04
0xffffffff878d839f <common_interrupt+47>:       test   %al,%al
0xffffffff878d83a1 <common_interrupt+49>:       je     0xffffffff878d83c8 <common_interrupt+88>  [5]
0xffffffff878d83a3 <common_interrupt+51>:       call   0xffffffff86efb8a0 <irq_enter_rcu>
0xffffffff878d83a8 <common_interrupt+56>:       mov    %r12d,%esi
0xffffffff878d83ab <common_interrupt+59>:       mov    %rbp,%rdi
0xffffffff878d83ae <common_interrupt+62>:       call   0xffffffff86e26630 <__common_interrupt>
0xffffffff878d83b3 <common_interrupt+67>:       call   0xffffffff86efb920 <irq_exit_rcu>
0xffffffff878d83b8 <common_interrupt+72>:       mov    %r13d,%esi
0xffffffff878d83bb <common_interrupt+75>:       mov    %rbp,%rdi
0xffffffff878d83be <common_interrupt+78>:       pop    %rbp
0xffffffff878d83bf <common_interrupt+79>:       pop    %r12
0xffffffff878d83c1 <common_interrupt+81>:       pop    %r13
0xffffffff878d83c3 <common_interrupt+83>:       jmp    0xffffffff878dab80 <irqentry_exit>
0xffffffff878d83c8 <common_interrupt+88>:       movb   $0x1,%gs:0x78740b34(%rip)        # 0x18f04 [6]
0xffffffff878d83d0 <common_interrupt+96>:       mov    %gs:0x78740b30(%rip),%r11        # 0x18f08 [7]
0xffffffff878d83d8 <common_interrupt+104>:      mov    %rsp,(%r11)                                [7]
0xffffffff878d83db <common_interrupt+107>:      mov    %r11,%rsp                                  [7]
0xffffffff878d83de <common_interrupt+110>:      call   0xffffffff86efb8a0 <irq_enter_rcu>
0xffffffff878d83e3 <common_interrupt+115>:      mov    %rbp,%rdi
0xffffffff878d83e6 <common_interrupt+118>:      mov    %r12d,%esi
0xffffffff878d83e9 <common_interrupt+121>:      call   0xffffffff86e26630 <__common_interrupt>
0xffffffff878d83ee <common_interrupt+126>:      call   0xffffffff86efb920 <irq_exit_rcu>
0xffffffff878d83f3 <common_interrupt+131>:      pop    %rsp
0xffffffff878d83f4 <common_interrupt+132>:      movb   $0x0,%gs:0x78740b08(%rip)        # 0x18f04 [8]
0xffffffff878d83fc <common_interrupt+140>:      mov    %r13d,%esi
0xffffffff878d83ff <common_interrupt+143>:      mov    %rbp,%rdi
0xffffffff878d8402 <common_interrupt+146>:      pop    %rbp
0xffffffff878d8403 <common_interrupt+147>:      pop    %r12
0xffffffff878d8405 <common_interrupt+149>:      pop    %r13
0xffffffff878d8407 <common_interrupt+151>:      jmp    0xffffffff878dab80 <irqentry_exit>

The common_interrupt pushes 3 registers [4], so now *(%rsp+32) = pt_regs.
[5]s look the following condition, the else block goes to [6].  Then,
writes the current %rsp to the address in hardirq_stack_ptr, which
points to the end of the irq stack, and switches %rsp to it [7].

#define call_on_irqstack_cond(func, regs, asm_call, constr, c_args...)  \
{                                                                       \
         /*                                                              \
          * User mode entry and interrupt on the irq stack do not        \
          * switch stacks. If from user mode the task stack is empty.    \
          */                                                             \
         if (user_mode(regs) || __this_cpu_read(hardirq_stack_inuse)) {  \ [5]
                 irq_enter_rcu();                                        \
                 func(c_args);                                           \
                 irq_exit_rcu();                                         \
         } else {                                                        \
                 /*                                                      \
                  * Mark the irq stack inuse _before_ and unmark _after_ \
                  * switching stacks. Interrupts are disabled in both    \
                  * places. Invoke the stack switch macro with the call  \
                  * sequence which matches the above direct invocation.  \
                  */                                                     \
                 __this_cpu_write(hardirq_stack_inuse, true);            \ [6]
                 call_on_irqstack(func, asm_call, constr);               \ [7]
                 __this_cpu_write(hardirq_stack_inuse, false);           \ [8]
         }                                                               \
}

crash-dev> sym -l
...
18f04 (D) hardirq_stack_inuse
18f08 (D) hardirq_stack_ptr
...
crash-dev> p hardirq_stack_ptr:a
per_cpu(hardirq_stack_ptr, 0) = $2 = (void *) 0xffffa5ab40003ff8
per_cpu(hardirq_stack_ptr, 1) = $3 = (void *) 0xffffa5ab400ecff8
per_cpu(hardirq_stack_ptr, 2) = $4 = (void *) 0xffffa5ab40118ff8
per_cpu(hardirq_stack_ptr, 3) = $5 = (void *) 0xffffa5ab40144ff8


On the other hand, the current crash reads the saved %rsp here:

     up = (ulong *)(&bt->stackbuf[stacktop - bt->stackbase]);
     up -= 1;     <<<-- the same as hardirq_stack_ptr, the saved %rsp
     irq_eframe = rsp = bt->stkptr = x86_64_irq_eframe_link(*up, bt, ofp);
     up -= 1;
     bt->instptr = *up;

So the irq_eframe_link should be -32, because *(%rsp+32) is pt_regs.
And the return address from the common_interrupt is *(%rsp+24),
the following code gets it.

     if (symbol_exists("asm_common_interrupt")) {
             i -= 1;
             up = (ulong *)(&bt->stackbuf[i*sizeof(ulong)]);
             bt->instptr = *up;
     }


With the same way, I think the irq_eframe_link for kernel 5.8 to
5.11 will be -64.

Thanks,
Kazu
--
Crash-utility mailing list
Crash-utility@xxxxxxxxxx
https://listman.redhat.com/mailman/listinfo/crash-utility
Contribution Guidelines: https://github.com/crash-utility/crash/wiki




[Index of Archives]     [Fedora Development]     [Fedora Desktop]     [Fedora SELinux]     [Yosemite News]     [KDE Users]     [Fedora Tools]

 

Powered by Linux