[Crash-utility] Re: [PATCH] x86_64: rhel 9.3: "crash" doesn't handle all IRQ exception properly

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2024/04/16 18:47, Tao Liu wrote:
> Hi Aureau,
> 
> On Tue, Apr 16, 2024 at 5:14 PM Aureau, Georges (Kernel Tools ERT)
> <georges.aureau@xxxxxxx> wrote:
>>
>> Hello Tao Liu,
>>
>> We don't update machdep->machspec->irq_eframe_link as it is now more a reference point for trying various link offsets (+8, -8, +16, -16) around it.
>> This reference point should be viewed as the most likely link offset, and if we fail, we try around it, but without updating this reference point,
>> otherwise we lose our reference point and the offsets we would be trying next would have a different meaning.
>> Different processors may be caught in different IRQ/SYSVEC functions with different framesizes,
>> so there is no longer a unique (same for all IRQ/SYSVEC functions) link offset, so we can't compute one eframe link for all.
>> We are adapting the eframe link offset on the fly based on possible IRQ/SYSVEC function framesizes, ie.
>> most likely -32, then try -40, then -24, then -48, then -16.
>> If we would update as we find a link, let's say -16, then next time around,
>> most likely -16, then try -24, then -8, then -32, then 0, eg. we would lose the -40 and -48.
>> So if we have a most likely eframe link as a reference point, we should not move this point.
>> The comment in x86_64_irq_eframe_link() for the patch I'm suggesting is saying:
>> "keep ms->irq_eframe_link as the most likely value, and try a few sizes around it."
>> See below:
> 
> Thanks for the explanation, it sounds reasonable. OK I have no other
> comments for this patch, so ack.

ok, the patch has got more than one acks and Lianbo also agreed.

The commit message looks long and not for being merged as it is, I've 
tweaked and shortened it a bit and applied.

https://github.com/crash-utility/crash/commit/7d4daf0c035409f677afd87e8d3b3063447dd63e

Thanks!
Kazu
From: "Aureau, Georges (Kernel Tools ERT)" <georges.aureau@xxxxxxx>
Date: Thu, 4 Apr 2024 14:39:06 +0000
Subject: [PATCH] x86_64: Fix "bt" command to handle IRQ exception frames properly

On x86_64, there are cases where crash cannot handle IRQ exception
frames properly.  For example, with RHEL9.3 kernel, "bt" command fails
with with "WARNING possibly bogus exception frame":

  crash> bt -c 30
  PID: 2898241  TASK: ff4cb0ce0da0c680  CPU: 30   COMMAND: "star-ccm+"
   #0 [fffffe4658d88e58] crash_nmi_callback at ffffffffa00675e8
   #1 [fffffe4658d88e68] nmi_handle at ffffffffa002ebab
  ...
  --- <NMI exception stack> ---
  ...
  #13 [ff5eba269937cf90] __do_softirq at ffffffffa0c6c007
  #14 [ff5eba269937cfe0] __irq_exit_rcu at ffffffffa010ef61
  #15 [ff5eba269937cff0] sysvec_apic_timer_interrupt at ffffffffa0c58ca2
  --- <IRQ stack> ---
      RIP: 0000000000000010  RSP: 0000000000000018  RFLAGS: ff5eba26ddc9f7e8
      RAX: 0000000000000a20  RBX: ff5eba26ddc9f940  RCX: 0000000000001000
      RDX: ffffffb559980000  RSI: ff4cb12d67207400  RDI: ffffffffffffffff
      RBP: 0000000000001000   R8: ff5eba26ddc9f940   R9: ff5eba26ddc9f8af
      R10: 0000000000000003  R11: 0000000000000a20  R12: ff5eba26ddc9f8b0
      R13: 000000283c07f000  R14: ff4cb0f5a29a1c00  R15: 0000000000000001
      ORIG_RAX: ffffffffa07c4e60  CS: 0206  SS: 7000001cf0380001
  bt: WARNING: possibly bogus exception frame

Running "crash" with "--machdep irq_eframe_link=0xffffffffffffffe8"
option (i.e. thus irq_eframe_link = -24) works properly:

  PID: 2898241  TASK: ff4cb0ce0da0c680  CPU: 30   COMMAND: "star-ccm+"
   #0 [fffffe4658d88e58] crash_nmi_callback at ffffffffa00675e8
   #1 [fffffe4658d88e68] nmi_handle at ffffffffa002ebab
  ...
  --- <NMI exception stack> ---
  ...
  #13 [ff5eba269937cf90] __do_softirq at ffffffffa0c6c007
  #14 [ff5eba269937cfe0] __irq_exit_rcu at ffffffffa010ef61
  #15 [ff5eba269937cff0] sysvec_apic_timer_interrupt at ffffffffa0c58ca2
  --- <IRQ stack> ---
  #16 [ff5eba26ddc9f738] asm_sysvec_apic_timer_interrupt at ffffffffa0e00e06
      [exception RIP: alloc_pte.constprop.0+32]
      RIP: ffffffffa07c4e60  RSP: ff5eba26ddc9f7e8  RFLAGS: 00000206
      RAX: ff5eba26ddc9f940  RBX: 0000000000001000  RCX: 0000000000000a20
      RDX: 0000000000001000  RSI: ffffffb559980000  RDI: ff4cb12d67207400
      RBP: ff5eba26ddc9f8b0   R8: ff5eba26ddc9f8af   R9: 0000000000000003
      R10: 0000000000000a20  R11: ff5eba26ddc9f940  R12: 000000283c07f000
      R13: ff4cb0f5a29a1c00  R14: 0000000000000001  R15: ff4cb0f5a29a1bf8
      ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
  #17 [ff5eba26ddc9f830] iommu_v1_map_pages at ffffffffa07c5648
  #18 [ff5eba26ddc9f8f8] __iommu_map at ffffffffa07d7803
  #19 [ff5eba26ddc9f990] iommu_map_sg at ffffffffa07d7b71
  #20 [ff5eba26ddc9f9f0] iommu_dma_map_sg at ffffffffa07ddcc9
  #21 [ff5eba26ddc9fa90] __dma_map_sg_attrs at ffffffffa01b5205
  ...

Some background:

asm_common_interrupt:
      callq  error_entry
      movq   %rax,%rsp
      movq   %rsp,%rdi
      movq   0x78(%rsp),%rsi
      movq   $-0x1,0x78(%rsp)
      call common_interrupt    # rsp pointing to regs
  
common_interrupt:
      pushq  %r12
      pushq  %rbp
      pushq  %rbx
      [...]
      movq   hardirq_stack_ptr,%r11
      movq   %rsp,(%r11)
      movq   %r11,%rsp
      [...]
      call __common_interrupt  # rip:common_interrupt

So frame_size(rip:common_interrupt) = 32 (3 push + ret).

Hence "machdep->machspec->irq_eframe_link = -32;" (see x86_64_irq_eframe_link_init()).

Now:

asm_sysvec_apic_timer_interrupt:
      pushq  $-0x1
      callq  error_entry
      movq   %rax,%rsp
      movq   %rsp,%rdi
      callq  sysvec_apic_timer_interrupt
  
sysvec_apic_timer_interrupt:
      pushq  %r12
      pushq  %rbp
      [...]
      movq   hardirq_stack_ptr,%r11
      movq   %rsp,(%r11)
      movq   %r11,%rsp
      [...]
      call __sysvec_apic_timer_interrupt  # rip:sysvec_apic_timer_interrupt

Here frame_size(rip:sysvec_apic_timer_interrupt) = 24 (2 push + ret)

We should also notice that:

rip  = *(hardirq_stack_ptr - 8)
rsp  = *(hardirq_stack_ptr)
regs = rsp - frame_size(rip)

But x86_64_get_framesize() does not work with IRQ handlers (returns 0).
So not many options other than hardcoding the most likely value and
looking around it.  Actually x86_64_irq_eframe_link() was trying -32
(default), and then -40, but not -24.

Signed-off-by: Georges Aureau <georges.aureau@xxxxxxx>
--
diff --git a/x86_64.c b/x86_64.c
index aec82b0..90c2a91 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -6623,13 +6623,14 @@ x86_64_irq_eframe_link_init(void)

 /*
  *  Calculate and verify the IRQ exception frame location from the
- *  stack reference at the top of the IRQ stack, possibly adjusting
- *  the ms->irq_eframe_link value.
+ *  stack reference at the top of the IRQ stack, keep ms->irq_eframe_link
+ *  as the most likely value, and try a few sizes around it.
  */
 static ulong
 x86_64_irq_eframe_link(ulong stkref, struct bt_info *bt, FILE *ofp)
 {
        ulong irq_eframe;
+       int i, try[] = { 8, -8, 16, -16 };

        if (x86_64_exception_frame(EFRAME_VERIFY, stkref, 0, bt, ofp))
                return stkref;
@@ -6639,9 +6640,10 @@ x86_64_irq_eframe_link(ulong stkref, struct bt_info *bt, FILE *ofp)
        if (x86_64_exception_frame(EFRAME_VERIFY, irq_eframe, 0, bt, ofp))
                return irq_eframe;

-       if (x86_64_exception_frame(EFRAME_VERIFY, irq_eframe+8, 0, bt, ofp)) {
-               machdep->machspec->irq_eframe_link -= 8;
-               return (irq_eframe + 8);
+       for (i = 0; i < sizeof(try)/sizeof(int); i++) {
+               if (x86_64_exception_frame(EFRAME_VERIFY, irq_eframe+try[i], 0, bt, ofp)) {
+                       return (irq_eframe + try[i]);
+               }
        }

        return irq_eframe;


--
Crash-utility mailing list -- devel@xxxxxxxxxxxxxxxxxxxxxxxxxxx
To unsubscribe send an email to devel-leave@xxxxxxxxxxxxxxxxxxxxxxxxxxx
https://${domain_name}/admin/lists/devel.lists.crash-utility.osci.io/
Contribution Guidelines: https://github.com/crash-utility/crash/wiki

--
Crash-utility mailing list -- devel@xxxxxxxxxxxxxxxxxxxxxxxxxxx
To unsubscribe send an email to devel-leave@xxxxxxxxxxxxxxxxxxxxxxxxxxx
https://${domain_name}/admin/lists/devel.lists.crash-utility.osci.io/
Contribution Guidelines: https://github.com/crash-utility/crash/wiki

[Index of Archives]     [Fedora Development]     [Fedora Desktop]     [Fedora SELinux]     [Yosemite News]     [KDE Users]     [Fedora Tools]

 

Powered by Linux