[Crash-utility] [PATCH] x86_64: rhel 9.3: "crash" doesn't handle all IRQ exception properly.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hello,

On x86_64, with rhel 9.3, there are cases where "crash" can't handle IRQ exception frames properly.

For example, "crash" fails with "WARNING: possibly bogus exception frame":

crash> bt -c 30
PID: 2898241  TASK: ff4cb0ce0da0c680  CPU: 30   COMMAND: "star-ccm+"
 #0 [fffffe4658d88e58] crash_nmi_callback at ffffffffa00675e8
 #1 [fffffe4658d88e68] nmi_handle at ffffffffa002ebab
 #2 [fffffe4658d88eb0] default_do_nmi at ffffffffa0c56cd0
 #3 [fffffe4658d88ed0] exc_nmi at ffffffffa0c56ee1
 #4 [fffffe4658d88ef0] end_repeat_nmi at ffffffffa0e01639
    [exception RIP: native_queued_spin_lock_slowpath+636]
    RIP: ffffffffa0c6b80c  RSP: ff5eba269937ce10  RFLAGS: 00000046
    RAX: 0000000000000000  RBX: ff4cb12bcfbb2940  RCX: 00000000007c0000
    RDX: 0000000000000057  RSI: 0000000001600000  RDI: ff4cb12d67205608
    RBP: ff4cb12d67205608   R8: ff90b9c682475940   R9: 0000000000000000
    R10: ff4cb0d70ee4e100  R11: 0000000000000000  R12: 0000000000000000
    R13: 000000000000001e  R14: ff90b9c682474918  R15: ff90b9c682477120
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
--- <NMI exception stack> ---
 #5 [ff5eba269937ce10] native_queued_spin_lock_slowpath at ffffffffa0c6b80c
 #6 [ff5eba269937ce30] _raw_spin_lock_irqsave at ffffffffa0c6ad90
 #7 [ff5eba269937ce40] free_iova at ffffffffa07df716
 #8 [ff5eba269937ce68] fq_ring_free at ffffffffa07dba15
 #9 [ff5eba269937ce98] fq_flush_timeout at ffffffffa07dc8fe
#10 [ff5eba269937ced0] call_timer_fn at ffffffffa01c00a4
#11 [ff5eba269937cef0] __run_timers at ffffffffa01c03ae
#12 [ff5eba269937cf88] run_timer_softirq at ffffffffa01c0476
#13 [ff5eba269937cf90] __do_softirq at ffffffffa0c6c007
#14 [ff5eba269937cfe0] __irq_exit_rcu at ffffffffa010ef61
#15 [ff5eba269937cff0] sysvec_apic_timer_interrupt at ffffffffa0c58ca2
--- <IRQ stack> ---
    RIP: 0000000000000010  RSP: 0000000000000018  RFLAGS: ff5eba26ddc9f7e8
    RAX: 0000000000000a20  RBX: ff5eba26ddc9f940  RCX: 0000000000001000
    RDX: ffffffb559980000  RSI: ff4cb12d67207400  RDI: ffffffffffffffff
    RBP: 0000000000001000   R8: ff5eba26ddc9f940   R9: ff5eba26ddc9f8af
    R10: 0000000000000003  R11: 0000000000000a20  R12: ff5eba26ddc9f8b0
    R13: 000000283c07f000  R14: ff4cb0f5a29a1c00  R15: 0000000000000001
    ORIG_RAX: ffffffffa07c4e60  CS: 0206  SS: 7000001cf0380001
bt: WARNING: possibly bogus exception frame


Running "crash" with "--machdep irq_eframe_link=0xffffffffffffffe8" (ie. thus irq_eframe_link = -24) works properly:

PID: 2898241  TASK: ff4cb0ce0da0c680  CPU: 30   COMMAND: "star-ccm+"
 #0 [fffffe4658d88e58] crash_nmi_callback at ffffffffa00675e8
 #1 [fffffe4658d88e68] nmi_handle at ffffffffa002ebab
 #2 [fffffe4658d88eb0] default_do_nmi at ffffffffa0c56cd0
 #3 [fffffe4658d88ed0] exc_nmi at ffffffffa0c56ee1
 #4 [fffffe4658d88ef0] end_repeat_nmi at ffffffffa0e01639
    [exception RIP: native_queued_spin_lock_slowpath+636]
    RIP: ffffffffa0c6b80c  RSP: ff5eba269937ce10  RFLAGS: 00000046
    RAX: 0000000000000000  RBX: ff4cb12bcfbb2940  RCX: 00000000007c0000
    RDX: 0000000000000057  RSI: 0000000001600000  RDI: ff4cb12d67205608
    RBP: ff4cb12d67205608   R8: ff90b9c682475940   R9: 0000000000000000
    R10: ff4cb0d70ee4e100  R11: 0000000000000000  R12: 0000000000000000
    R13: 000000000000001e  R14: ff90b9c682474918  R15: ff90b9c682477120
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
--- <NMI exception stack> ---
 #5 [ff5eba269937ce10] native_queued_spin_lock_slowpath at ffffffffa0c6b80c
 #6 [ff5eba269937ce30] _raw_spin_lock_irqsave at ffffffffa0c6ad90
 #7 [ff5eba269937ce40] free_iova at ffffffffa07df716
 #8 [ff5eba269937ce68] fq_ring_free at ffffffffa07dba15
 #9 [ff5eba269937ce98] fq_flush_timeout at ffffffffa07dc8fe
#10 [ff5eba269937ced0] call_timer_fn at ffffffffa01c00a4
#11 [ff5eba269937cef0] __run_timers at ffffffffa01c03ae
#12 [ff5eba269937cf88] run_timer_softirq at ffffffffa01c0476
#13 [ff5eba269937cf90] __do_softirq at ffffffffa0c6c007
#14 [ff5eba269937cfe0] __irq_exit_rcu at ffffffffa010ef61
#15 [ff5eba269937cff0] sysvec_apic_timer_interrupt at ffffffffa0c58ca2
--- <IRQ stack> ---
#16 [ff5eba26ddc9f738] asm_sysvec_apic_timer_interrupt at ffffffffa0e00e06
    [exception RIP: alloc_pte.constprop.0+32]
    RIP: ffffffffa07c4e60  RSP: ff5eba26ddc9f7e8  RFLAGS: 00000206
    RAX: ff5eba26ddc9f940  RBX: 0000000000001000  RCX: 0000000000000a20
    RDX: 0000000000001000  RSI: ffffffb559980000  RDI: ff4cb12d67207400
    RBP: ff5eba26ddc9f8b0   R8: ff5eba26ddc9f8af   R9: 0000000000000003
    R10: 0000000000000a20  R11: ff5eba26ddc9f940  R12: 000000283c07f000
    R13: ff4cb0f5a29a1c00  R14: 0000000000000001  R15: ff4cb0f5a29a1bf8
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
#17 [ff5eba26ddc9f830] iommu_v1_map_pages at ffffffffa07c5648
#18 [ff5eba26ddc9f8f8] __iommu_map at ffffffffa07d7803
#19 [ff5eba26ddc9f990] iommu_map_sg at ffffffffa07d7b71
#20 [ff5eba26ddc9f9f0] iommu_dma_map_sg at ffffffffa07ddcc9
#21 [ff5eba26ddc9fa90] __dma_map_sg_attrs at ffffffffa01b5205
#22 [ff5eba26ddc9fa98] dma_map_sgtable at ffffffffa01b526d
#23 [ff5eba26ddc9faa8] ib_umem_get at ffffffffc112f308 [ib_uverbs]
#24 [ff5eba26ddc9fb00] mlx5_ib_reg_user_mr at ffffffffc11eb991 [mlx5_ib]
#25 [ff5eba26ddc9fb40] ib_uverbs_reg_mr at ffffffffc1123bc4 [ib_uverbs]
#26 [ff5eba26ddc9fbb0] ib_uverbs_handler_UVERBS_METHOD_INVOKE_WRITE at ffffffffc112d064 [ib_uverbs]
#27 [ff5eba26ddc9fbe0] ib_uverbs_run_method at ffffffffc112a121 [ib_uverbs]
#28 [ff5eba26ddc9fc30] ib_uverbs_cmd_verbs at ffffffffc112a332 [ib_uverbs]
#29 [ff5eba26ddc9fe68] ib_uverbs_ioctl at ffffffffc112a494 [ib_uverbs]
#30 [ff5eba26ddc9fea8] __x64_sys_ioctl at ffffffffa0438f67
#31 [ff5eba26ddc9fed8] do_syscall_64 at ffffffffa0c55189
#32 [ff5eba26ddc9ff50] entry_SYSCALL_64_after_hwframe at ffffffffa0e000ea
    RIP: 0000146ef3a3ec6b  RSP: 00007ffe3b7dc5b8  RFLAGS: 00000246
    RAX: ffffffffffffffda  RBX: 00007ffe3b7dc6a8  RCX: 0000146ef3a3ec6b
    RDX: 00007ffe3b7dc690  RSI: 00000000c0181b01  RDI: 0000000000000011
    RBP: 00007ffe3b7dc670   R8: 0000146ed9549010   R9: 00007ffe3b7dc7c4
    R10: 0000000000000009  R11: 0000000000000246  R12: 00007ffe3b7dc7c4
    R13: 000000000000000c  R14: 000000000000000c  R15: 0000146ed9549150
    ORIG_RAX: 0000000000000010  CS: 0033  SS: 002b


Some background:

asm_common_interrupt:
      callq  error_entry
      movq   %rax,%rsp
      movq   %rsp,%rdi
      movq   0x78(%rsp),%rsi
      movq   $-0x1,0x78(%rsp)
      call common_interrupt    # rsp pointing to regs
  
common_interrupt
      pushq  %r12
      pushq  %rbp
      pushq  %rbx
      [...]
      movq   hardirq_stack_ptr,%r11
      movq   %rsp,(%r11)
      movq   %r11,%rsp
      [...]
      call __common_interrupt  # rip:common_interrupt

So frame_size(rip:common_interrupt) = 32 (3 push + ret).

Hence "machdep->machspec->irq_eframe_link = -32;"  (see x86_64_irq_eframe_link_init()).

Now:

asm_sysvec_apic_timer_interrupt:
      pushq  $-0x1
      callq  error_entry
      movq   %rax,%rsp
      movq   %rsp,%rdi
      callq  sysvec_apic_timer_interrupt
  
sysvec_apic_timer_interrupt
      pushq  %r12
      pushq  %rbp
      [...]
      movq   hardirq_stack_ptr,%r11
      movq   %rsp,(%r11)
      movq   %r11,%rsp
      [...]
      call __sysvec_apic_timer_interrupt  # rip:sysvec_apic_timer_interrupt

Here frame_size(rip:sysvec_apic_timer_interrupt) = 24 (2 push + ret)

We should also notice that:

rip  = *(hardirq_stack_ptr - 8)
rsp  = *(hardirq_stack_ptr)
regs = rsp - frame_size(rip)

But x86_64_get_framesize() does not work with IRQ handlers (returns 0).
So not many options other than hardcoding the most likely value and looking around it.
Actually x86_64_irq_eframe_link() was trying -32 (default), and then -40,  but not -24 !

Signed-off-by: Georges Aureau <georges.aureau@xxxxxxx>
--
diff --git a/x86_64.c b/x86_64.c
index aec82b0..90c2a91 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -6623,13 +6623,14 @@ x86_64_irq_eframe_link_init(void)

 /*
  *  Calculate and verify the IRQ exception frame location from the
- *  stack reference at the top of the IRQ stack, possibly adjusting
- *  the ms->irq_eframe_link value.
+ *  stack reference at the top of the IRQ stack, keep ms->irq_eframe_link
+ *  as the most likely value, and try a few sizes around it.
  */
 static ulong
 x86_64_irq_eframe_link(ulong stkref, struct bt_info *bt, FILE *ofp)
 {
        ulong irq_eframe;
+       int i, try[] = { 8, -8, 16, -16 };

        if (x86_64_exception_frame(EFRAME_VERIFY, stkref, 0, bt, ofp))
                return stkref;
@@ -6639,9 +6640,10 @@ x86_64_irq_eframe_link(ulong stkref, struct bt_info *bt, FILE *ofp)
        if (x86_64_exception_frame(EFRAME_VERIFY, irq_eframe, 0, bt, ofp))
                return irq_eframe;

-       if (x86_64_exception_frame(EFRAME_VERIFY, irq_eframe+8, 0, bt, ofp)) {
-               machdep->machspec->irq_eframe_link -= 8;
-               return (irq_eframe + 8);
+       for (i=0; i<sizeof(try)/sizeof(int); i++) {
+               if (x86_64_exception_frame(EFRAME_VERIFY, irq_eframe+try[i], 0, bt, ofp)) {
+                       return (irq_eframe + try[i]);
+               }
        }

        return irq_eframe;


--
Crash-utility mailing list -- devel@xxxxxxxxxxxxxxxxxxxxxxxxxxx
To unsubscribe send an email to devel-leave@xxxxxxxxxxxxxxxxxxxxxxxxxxx
https://${domain_name}/admin/lists/devel.lists.crash-utility.osci.io/
Contribution Guidelines: https://github.com/crash-utility/crash/wiki




[Index of Archives]     [Fedora Development]     [Fedora Desktop]     [Fedora SELinux]     [Yosemite News]     [KDE Users]     [Fedora Tools]

 

Powered by Linux