apparent KVM problem with LRET in TianoCore S3 resume trampoline

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

I'm working on S3 suspend/resume in OVMF. The problem is that I'm getting an
unexpected guest reboot for code (LRET) that works on physical hardware. I
tried to trace the problem with ftrace, but I didn't get any mentions of
em_ret_far(). (Maybe I was looking in the wrong place.)

Please find the the assembly-language "trampoline" that is invoked (in 64-bit
mode) with the 16-bit real mode resume vector placed in "rcx" (EFIAPI calling
convention). The excerpt is from the edk2 tree,
"MdeModulePkg/Universal/Acpi/BootScriptExecutorDxe/X64/S3Asm.S".

I'm annotating the source code to the right -- please excuse my audacity as I
know you all eat assembly for breakfast, but maybe it will speed up your
processing. (Or perhaps I'll sneakily confuse you with my errors :))

  ASM_GLOBAL ASM_PFX(AsmTransferControl)                     #
  ASM_PFX(AsmTransferControl):                               #
      # rcx S3WakingVector    :DWORD                         # ecx: ........ ....PPPP QQQQQQQQ RRRRSSSS
      # rdx AcpiLowMemoryBase :DWORD                         #
      lea   _AsmTransferControl_al_0000(%rip), %eax          # pushing $0x28 for CS
      movq  $0x2800000000, %r8                               # and address of
      orq   %r8, %rax                                        # _AsmTransferControl_al_0000
      pushq %rax                                             # for RIP
      shrd  $20, %ecx, %ebx                                  # ebx: PPPPQQQQ QQQQRRRR SSSS.... ........
      andl  $0x0f, %ecx                                      # ecx: 00000000 00000000 00000000 0000SSSS
      movw  %cx, %bx                                         # ebx: PPPPQQQQ QQQQRRRR 00000000 0000SSSS
      movl  %ebx, jmp_addr(%rip)                             # stores vector as 16-bit segment:offset pair
  xxxx:                                                      # -- my own loop
      jmp xxxx                                               # -- for debugging
      lret                                                   # (*) TRIGGERS REBOOT
  _AsmTransferControl_al_0000:                               #
      .byte    0x0b8, 0x30, 0      # mov ax, 30h as selector #
      movl  %eax, %ds                                        #
      movl  %eax, %es                                        #
      movl  %eax, %fs                                        #
      movl  %eax, %gs                                        #
      movl  %eax, %ss                                        #
      movq  %cr0, %rax                                       #
      movq  %cr4, %rbx                                       #
      .byte    0x66                                          # (**)
      andl  $0x7ffffffe, %eax                                # preps for turning off Paging and Protection Enable
      andb  $0xdf, %bl                                       # preps for turning off PAE
      movq  %rax, %cr0                                       # Paging and PE off
      .byte    0x66                                          # (**)
      movl  $0x0c0000080, %ecx                               #
      rdmsr                                                  #
      andb  $0xfe, %ah                                       #
      wrmsr                                                  # IA-32e Mode Enable off
      movq  %rbx, %cr4                                       # PAE off
      .byte    0x0ea              # jmp far jmp_addr         #
  jmp_addr:                                                  #
      .long    0                                             # PPPPQQQQ QQQQRRRR:SSSS

The small loop at xxxx is my debug loop. The "lret" instruction right after
(marked with (*)) triggers a reboot in KVM.

In the loop, this is the register dump (taken with the "info registers" qemu
monitor command):

  RAX=000000289c75be2b RBX=000000009a1d0000 RCX=0000000000000000 RDX=0000000000000000
  RSI=0000000000000000 RDI=0000000000000000 RBP=000000009f7bafd0 RSP=000000009f7bae30
  R8 =0000002800000000 R9 =0000000000000000 R10=00000000008454cd R11=0000000000000000
  R12=0000000000000000 R13=0000000000000000 R14=00000000008454c6 R15=0000000000000000
  RIP=000000009c75be28 RFL=00000046 [---Z-P-] CPL=0 II=0 A20=1 SMM=0 HLT=0
  ES =0018 0000000000000000 ffffffff 00c09300 DPL=0 DS   [-WA]
  CS =0018 0000000000000000 ffffffff 00a09b00 DPL=0 CS64 [-RA]
  SS =0018 0000000000000000 ffffffff 00c09300 DPL=0 DS   [-WA]
  DS =0018 0000000000000000 ffffffff 00c09300 DPL=0 DS   [-WA]
  FS =0018 0000000000000000 ffffffff 00c09300 DPL=0 DS   [-WA]
  GS =0018 0000000000000000 ffffffff 00c09300 DPL=0 DS   [-WA]
  LDT=0000 0000000000000000 0000ffff 00008200 DPL=0 LDT
  TR =0000 0000000000000000 0000ffff 00008b00 DPL=0 TSS64-busy
  GDT=     0000000000844c80 00000047
  IDT=     000000009c01fd60 0000021f
  CR0=80000033 CR2=0000000000000000 CR3=0000000000080000 CR4=00000660
  DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000 DR3=0000000000000000
  DR6=00000000ffff0ff0 DR7=0000000000000400
  EFER=0000000000000500
  FCW=037f FSW=0000 [ST=0] FTW=00 MXCSR=00001f80
  FPR0=0000000000000000 0000 FPR1=0000000000000000 0000
  FPR2=0000000000000000 0000 FPR3=0000000000000000 0000
  FPR4=0000000000000000 0000 FPR5=0000000000000000 0000
  FPR6=0000000000000000 0000 FPR7=0000000000000000 0000
  XMM00=00000000000000000000000000000000 XMM01=00000000000000000000000000000000
  XMM02=00000000000000000000000000000000 XMM03=00000000000000000000000000000000
  XMM04=00000000000000000000000000000000 XMM05=00000000000000000000000000000000
  XMM06=00000000000000000000000000000000 XMM07=00000000000000000000000000000000
  XMM08=00000000000000000000000000000000 XMM09=00000000000000000000000000000000
  XMM10=00000000000000000000000000000000 XMM11=00000000000000000000000000000000
  XMM12=00000000000000000000000000000000 XMM13=00000000000000000000000000000000
  XMM14=00000000000000000000000000000000 XMM15=00000000000000000000000000000000

Right before the function call in the C source, I also read the CS register,
the GDTR, and the GDT entries (please excuse the long lines):

  S3ResumeBootOs: CS=0x0018
  S3ResumeBootOs: Desc.Limit=0x0047
  0x0000: 0x0000000000000000: Base=0x00000000 Limit=0x00000 Type=0x0 (D RO       ) S=0x0 (system   ) DPL=0x0 Present=0 Avail=0 64-bitC=0 D/B=0 LimitGran=0x0 (1B )
  0x0008: 0x0000000000000000: Base=0x00000000 Limit=0x00000 Type=0x0 (D RO       ) S=0x0 (system   ) DPL=0x0 Present=0 Avail=0 64-bitC=0 D/B=0 LimitGran=0x0 (1B )
  0x0010: 0x00CF9B000000FFFF: Base=0x00000000 Limit=0xFFFFF Type=0xB (C ER A     ) S=0x1 (code/data) DPL=0x0 Present=1 Avail=0 64-bitC=0 D/B=1 LimitGran=0x1 (4KB)
  0x0018: 0x00CF93000000FFFF: Base=0x00000000 Limit=0xFFFFF Type=0x3 (D RW A     ) S=0x1 (code/data) DPL=0x0 Present=1 Avail=0 64-bitC=0 D/B=1 LimitGran=0x1 (4KB)
  0x0020: 0x0000000000000000: Base=0x00000000 Limit=0x00000 Type=0x0 (D RO       ) S=0x0 (system   ) DPL=0x0 Present=0 Avail=0 64-bitC=0 D/B=0 LimitGran=0x0 (1B )
  0x0028: 0x008F9B000000FFFF: Base=0x00000000 Limit=0xFFFFF Type=0xB (C ER A     ) S=0x1 (code/data) DPL=0x0 Present=1 Avail=0 64-bitC=0 D/B=0 LimitGran=0x1 (4KB)
  0x0030: 0x008F93000000FFFF: Base=0x00000000 Limit=0xFFFFF Type=0x3 (D RW A     ) S=0x1 (code/data) DPL=0x0 Present=1 Avail=0 64-bitC=0 D/B=0 LimitGran=0x1 (4KB)
  0x0038: 0x00AF9B000000FFFF: Base=0x00000000 Limit=0xFFFFF Type=0xB (C ER A     ) S=0x1 (code/data) DPL=0x0 Present=1 Avail=0 64-bitC=1 D/B=0 LimitGran=0x1 (4KB)
  0x0040: 0x0000000000000000: Base=0x00000000 Limit=0x00000 Type=0x0 (D RO       ) S=0x0 (system   ) DPL=0x0 Present=0 Avail=0 64-bitC=0 D/B=0 LimitGran=0x0 (1B )

The purpose of the LRET would be (by way of selecting CS=0x0028) to select
compat mode code execution (64-bitC=0), and to turn off the D/B bit, ie. set
default address & operand size to 16 bits. (This is the justification for the
0x66 prefixes I marked with (**) in the assembly.)

Interesting things:
- CS is currently 0x18, which describes a data segment. Strange (but works).
- If I select 0x38 or 0x10 as CS, then the LRET works fine (as in, I reach the
  target label.)
- The offending segment descriptor (at 0x28) differs from these other working
  segment descriptors in the following small details:

  - shared properties:
    Base=0x00000000
    Limit=0xFFFFF
    Type=0xB (C ER A     )
    S=0x1 (code/data)
    DPL=0x0
    Present=1
    Avail=0
    LimitGran=0x1 (4KB)

  - different properties:
    0x0010: 0x00CF9B000000FFFF: 64-bitC=0 D/B=1  works
    0x0028: 0x008F9B000000FFFF: 64-bitC=0 D/B=0  reboots
    0x0038: 0x00AF9B000000FFFF: 64-bitC=1 D/B=0  works

That is:
- if I let 64-bit mode execution enabled (64-bitC=1, desc 0x38), the lret
  works.
- If I switch to compat mode execution (64-bitC=0, desc 0x10), *and* keep the
  default addr/op size 32 bits, the lret still works.
- If I switch to compat mode execution (64-bitC=0, desc 0x28), but also change
  the default addr/op size to 16-bits, then the lret reboots the guest in KVM
  (but works on physical hardware).

Host:
- Intel(R) Core(TM) i7 CPU M 620 @ 2.67GHz

- KVM parameters (all left at default):
  emulate_invalid_guest_state: Y
  enable_apicv: N
  enable_shadow_vmcs: N
  ept: Y
  eptad: N
  fasteoi: Y
  flexpriority: Y
  nested: N
  ple_gap: 0
  ple_window: 4096
  unrestricted_guest: Y
  vmm_exclusive: Y
  vpid: Y

- KVM: 3.11
- qemu: at 7dc65c02 ("Open 2.0 development tree")
- guest RAM size: 2560 MB (0xA0000000 bytes)

I'm also pasting an objdump disassembly of the routine below (compiled without
my small debug loop). The disassembly is kind of garbled (eg. the movabs and
the 32-bit code), but the hexdump might be helpful.

Please keep me CC'd, I'm not subscribed.

Thank you!
Laszlo

0000000000000000 <AsmTransferControl>:
   0:   8d 05 1f 00 00 00       lea    0x1f(%rip),%eax        # 25 <_AsmTransferControl_al_0000>
   6:   49 b8 00 00 00 00 28    movabs $0x2800000000,%r8
   d:   00 00 00
  10:   4c 09 c0                or     %r8,%rax
  13:   50                      push   %rax
  14:   0f ac cb 14             shrd   $0x14,%ecx,%ebx
  18:   83 e1 0f                and    $0xf,%ecx
  1b:   66 89 cb                mov    %cx,%bx
  1e:   89 1d 31 00 00 00       mov    %ebx,0x31(%rip)        # 55 <jmp_addr>
  24:   cb                      lret

0000000000000025 <_AsmTransferControl_al_0000>:
  25:   b8 30 00 8e d8          mov    $0xd88e0030,%eax
  2a:   8e c0                   mov    %eax,%es
  2c:   8e e0                   mov    %eax,%fs
  2e:   8e e8                   mov    %eax,%gs
  30:   8e d0                   mov    %eax,%ss
  32:   0f 20 c0                mov    %cr0,%rax
  35:   0f 20 e3                mov    %cr4,%rbx
  38:   66 25 fe ff             and    $0xfffe,%ax
  3c:   ff                      (bad)
  3d:   7f 80                   jg     ffffffffffffffbf <L1+0xfffffffffffffeb8>
  3f:   e3 df                   jrcxz  20 <AsmTransferControl+0x20>
  41:   0f 22 c0                mov    %rax,%cr0
  44:   66 b9 80 00             mov    $0x80,%cx
  48:   00 c0                   add    %al,%al
  4a:   0f 32                   rdmsr
  4c:   80 e4 fe                and    $0xfe,%ah
  4f:   0f 30                   wrmsr
  51:   0f 22 e3                mov    %rbx,%cr4
  54:   ea                      (bad)

0000000000000055 <jmp_addr>:
  55:   00 00                   add    %al,(%rax)
        ...
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux