On 5/15/24 10:06, Liang Chen wrote:
In a nested VM environment, a vCPU can run either an L1 or L2 VM. If the
L0 VMM tries to configure L1 VM registers via the KVM_SET_REGS ioctl while
the vCPU is running an L2 VM, it may inadvertently modify the L2 VM's
registers, corrupting the L2 VM. To avoid this error, registers should be
treated as read-only when the vCPU is actively running an L2 VM.
No, this is intentional. The L0 hypervisor has full control on the CPU
registers, no matter if the VM is in guest mode or not.
Looking at the syzkaller report, the first thing to do is to remove the
threading, because vcpu ioctls are anyway serialized by the vcpu mutex.
Let's assume that the first 7 operations, i.e. all except KVM_RUN and
KVM_SET_REGS, execute in sequence. There is possible interleaving of
the two syz_kvm_setup_cpu$x86 calls, but because only the second sets
up nested virtualization, we can hope that we're lucky.[1]
To do so, change the main to something like
int main(int argc, char **argv)
{
int i;
syscall(__NR_mmap, /*addr=*/0x1ffff000ul, /*len=*/0x1000ul, /*prot=*/0ul,
/*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul, /*fd=*/-1,
/*offset=*/0ul);
syscall(__NR_mmap, /*addr=*/0x20000000ul, /*len=*/0x1000000ul,
/*prot=PROT_WRITE|PROT_READ|PROT_EXEC*/ 7ul,
/*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul, /*fd=*/-1,
/*offset=*/0ul);
syscall(__NR_mmap, /*addr=*/0x21000000ul, /*len=*/0x1000ul, /*prot=*/0ul,
/*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul, /*fd=*/-1,
/*offset=*/0ul);
// r0 = openat$kvm(0xffffffffffffff9c, &(0x7f0000000440), 0x0, 0x0)
execute_call(0);
// r1 = ioctl$KVM_CREATE_VM(r0, 0xae01, 0x0)
execute_call(1);
// r2 = ioctl$KVM_CREATE_VCPU(r1, 0xae41, 0x0)
execute_call(2);
// mmap(&(0x7f0000000000/0x3000)=nil, 0x3000, 0x2, 0x13, r2, 0x0)
execute_call(3);
// syz_kvm_setup_cpu$x86(r1, 0xffffffffffffffff, &(0x7f0000fe7000/0x18000)=nil, &(0x7f0000000200)=[@text16={0x10, 0x0}], 0x1, 0x0, 0x0, 0x0)
execute_call(4);
// ioctl$KVM_REGISTER_COALESCED_MMIO(0xffffffffffffffff, 0x4010ae67, &(0x7f0000000000)={0x2})
execute_call(5);
// syz_kvm_setup_cpu$x86(0xffffffffffffffff, r2, &(0x7f0000fe7000/0x18000)=nil, &(0x7f0000000340)=[@text64={0x40, 0x0}], 0x1, 0x50, 0x0, 0x0)
execute_call(6);
// ioctl$KVM_RUN(r2, 0xae80, 0x0) (rerun: 64)
for (i = 0; i < atoi(argv[1]); i++)
execute_call(7);
// ioctl$KVM_SET_REGS(r2, 0x4090ae82, &(0x7f00000000c0)={[0x7], 0x0, 0x60000})
// interleaved with KVM_RUN
execute_call(8);
// one more KVM_RUN to show the bug
execute_call(7);
return 0;
}
This reproduces the fact that KVM_SET_REGS can sneak in between any
two KVM_RUN. You'll see that changing the argument may cause an entry with
invalid guest state (argument is 0 or >= 3 - that's ok) or the WARN
(argument == 1).
The attached cleaned up reproducer shows that the problem is simply that
EFLAGS.VM is set in 64-bit mode. To fix it, it should be enough to do
a nested_vmx_vmexit(vcpu, EXIT_REASON_TRIPLE_FAULT, 0, 0); just like
a few lines below.
Paolo
[1] in fact the first syz_kvm_setup_cpu$v86 passes vcpufd==-1 and
the second passes vmfd==-1. Each of them does only half of the work.
The ioctl$KVM_REGISTER_COALESCED_MMIO is also mostly dummy because it
passes -1 as the file descriptor, but it happens to set
vcpu->run->request_interrupt_window! Which is important later.
Signed-off-by: Liang Chen <liangchen.linux@xxxxxxxxx>
Reported-by: syzbot+988d9efcdf137bc05f66@xxxxxxxxxxxxxxxxxxxxxxxxx
Closes: https://lore.kernel.org/all/0000000000007a9acb06151e1670@xxxxxxxxxx
---
arch/x86/kvm/x86.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 91478b769af0..30f63a7dd120 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11527,6 +11527,12 @@ static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
+ /*
+ * Prevent L0 VMM from inadvertently modifying L2 VM registers directly.
+ */
+ if (is_guest_mode(vcpu))
+ return -EACCES;
+
vcpu_load(vcpu);
__set_regs(vcpu, regs);
vcpu_put(vcpu);
// https://syzkaller.appspot.com/bug?id=6c089cbea6bd9a7db9b4c6da59c36f6648be2f6d
// autogenerated by syzkaller (https://github.com/google/syzkaller)
#define _GNU_SOURCE
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <unistd.h>
#include <linux/kvm.h>
#define ADDR_TEXT 0x0000
#define ADDR_GDT 0x1000
#define ADDR_PML4 0x2000
#define ADDR_PDP 0x3000
#define ADDR_PD 0x4000
#define ADDR_STACK0 0x0f80
#define ADDR_VAR_HLT 0x2800
#define ADDR_VAR_SYSRET 0x2808
#define ADDR_VAR_SYSEXIT 0x2810
#define ADDR_VAR_TSS64 0x3a00
#define ADDR_VAR_TSS32 0x4800
#define ADDR_VAR_VMXON_PTR 0x5f00
#define ADDR_VAR_VMCS_PTR 0x5f08
#define ADDR_VAR_VMEXIT_PTR 0x5f10
#define ADDR_VAR_VMWRITE_FLD 0x5f18
#define ADDR_VAR_VMWRITE_VAL 0x5f20
#define ADDR_VAR_VMXON 0x6000
#define ADDR_VAR_VMCS 0x7000
#define ADDR_VAR_VMEXIT_CODE 0x9000
#define ADDR_VAR_USER_CODE 0x9100
#define SEL_CS32 (6 << 3)
#define SEL_DS32 (7 << 3)
#define SEL_CS64 (10 << 3)
#define SEL_DS64 (11 << 3)
#define SEL_TSS32 (23 << 3)
#define SEL_TSS64 (27 << 3)
#define SEL_TSS64_HI (28 << 3)
const char kvm_asm64_init_vm[] =
"\x0f\x20\xc0\x0d\x00\x00\x00\x80\x0f\x22\xc0\xea\x12\x00\x00\x00\x50\x00"
"\x48\xc7\xc0\xd8\x00\x00\x00\x0f\x00\xd8\x48\xc7\xc1\x3a\x00\x00\x00\x0f"
"\x32\x48\x83\xc8\x05\x0f\x30\x0f\x20\xe0\x48\x0d\x00\x20\x00\x00\x0f\x22"
"\xe0\x48\xc7\xc1\x80\x04\x00\x00\x0f\x32\x48\xc7\xc2\x00\x60\x00\x00\x89"
"\x02\x48\xc7\xc2\x00\x70\x00\x00\x89\x02\x48\xc7\xc0\x00\x5f\x00\x00\xf3"
"\x0f\xc7\x30\x48\xc7\xc0\x08\x5f\x00\x00\x66\x0f\xc7\x30\x0f\xc7\x30\x48"
"\xc7\xc1\x81\x04\x00\x00\x0f\x32\x48\x83\xc8\x00\x48\x21\xd0\x48\xc7\xc2"
"\x00\x40\x00\x00\x0f\x79\xd0\x48\xc7\xc1\x82\x04\x00\x00\x0f\x32\x48\x83"
"\xc8\x00\x48\x21\xd0\x48\xc7\xc2\x02\x40\x00\x00\x0f\x79\xd0\x48\xc7\xc2"
"\x1e\x40\x00\x00\x48\xc7\xc0\x81\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc1\x83"
"\x04\x00\x00\x0f\x32\x48\x0d\xff\x6f\x03\x00\x48\x21\xd0\x48\xc7\xc2\x0c"
"\x40\x00\x00\x0f\x79\xd0\x48\xc7\xc1\x84\x04\x00\x00\x0f\x32\x48\x0d\xff"
"\x17\x00\x00\x48\x21\xd0\x48\xc7\xc2\x12\x40\x00\x00\x0f\x79\xd0\x48\xc7"
"\xc2\x04\x2c\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2"
"\x00\x28\x00\x00\x48\xc7\xc0\xff\xff\xff\xff\x0f\x79\xd0\x48\xc7\xc2\x02"
"\x0c\x00\x00\x48\xc7\xc0\x50\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc0\x58\x00"
"\x00\x00\x48\xc7\xc2\x00\x0c\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x04\x0c\x00"
"\x00\x0f\x79\xd0\x48\xc7\xc2\x06\x0c\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x08"
"\x0c\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x0a\x0c\x00\x00\x0f\x79\xd0\x48\xc7"
"\xc0\xd8\x00\x00\x00\x48\xc7\xc2\x0c\x0c\x00\x00\x0f\x79\xd0\x48\xc7\xc2"
"\x02\x2c\x00\x00\x48\xc7\xc0\x00\x05\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x00"
"\x4c\x00\x00\x48\xc7\xc0\x50\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x10\x6c"
"\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x12\x6c\x00"
"\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x0f\x20\xc0\x48\xc7\xc2\x00"
"\x6c\x00\x00\x48\x89\xc0\x0f\x79\xd0\x0f\x20\xd8\x48\xc7\xc2\x02\x6c\x00"
"\x00\x48\x89\xc0\x0f\x79\xd0\x0f\x20\xe0\x48\xc7\xc2\x04\x6c\x00\x00\x48"
"\x89\xc0\x0f\x79\xd0\x48\xc7\xc2\x06\x6c\x00\x00\x48\xc7\xc0\x00\x00\x00"
"\x00\x0f\x79\xd0\x48\xc7\xc2\x08\x6c\x00\x00\x48\xc7\xc0\x00\x00\x00\x00"
"\x0f\x79\xd0\x48\xc7\xc2\x0a\x6c\x00\x00\x48\xc7\xc0\x00\x3a\x00\x00\x0f"
"\x79\xd0\x48\xc7\xc2\x0c\x6c\x00\x00\x48\xc7\xc0\x00\x10\x00\x00\x0f\x79"
"\xd0\x48\xc7\xc2\x0e\x6c\x00\x00\x48\xc7\xc0\x00\x38\x00\x00\x0f\x79\xd0"
"\x48\xc7\xc2\x14\x6c\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48"
"\xc7\xc2\x16\x6c\x00\x00\x48\x8b\x04\x25\x10\x5f\x00\x00\x0f\x79\xd0\x48"
"\xc7\xc2\x00\x00\x00\x00\x48\xc7\xc0\x01\x00\x00\x00\x0f\x79\xd0\x48\xc7"
"\xc2\x02\x00\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2"
"\x00\x20\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x02"
"\x20\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x04\x20"
"\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x06\x20\x00"
"\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc1\x77\x02\x00\x00"
"\x0f\x32\x48\xc1\xe2\x20\x48\x09\xd0\x48\xc7\xc2\x00\x2c\x00\x00\x48\x89"
"\xc0\x0f\x79\xd0\x48\xc7\xc2\x04\x40\x00\x00\x48\xc7\xc0\x00\x00\x00\x00"
"\x0f\x79\xd0\x48\xc7\xc2\x0a\x40\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f"
"\x79\xd0\x48\xc7\xc2\x0e\x40\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79"
"\xd0\x48\xc7\xc2\x10\x40\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0"
"\x48\xc7\xc2\x16\x40\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48"
"\xc7\xc2\x14\x40\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7"
"\xc2\x00\x60\x00\x00\x48\xc7\xc0\xff\xff\xff\xff\x0f\x79\xd0\x48\xc7\xc2"
"\x02\x60\x00\x00\x48\xc7\xc0\xff\xff\xff\xff\x0f\x79\xd0\x48\xc7\xc2\x1c"
"\x20\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x1e\x20"
"\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x20\x20\x00"
"\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x22\x20\x00\x00"
"\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x00\x08\x00\x00\x48"
"\xc7\xc0\x58\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x02\x08\x00\x00\x48\xc7"
"\xc0\x50\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x04\x08\x00\x00\x48\xc7\xc0"
"\x58\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x06\x08\x00\x00\x48\xc7\xc0\x58"
"\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x08\x08\x00\x00\x48\xc7\xc0\x58\x00"
"\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x0a\x08\x00\x00\x48\xc7\xc0\x58\x00\x00"
"\x00\x0f\x79\xd0\x48\xc7\xc2\x0c\x08\x00\x00\x48\xc7\xc0\x00\x00\x00\x00"
"\x0f\x79\xd0\x48\xc7\xc2\x0e\x08\x00\x00\x48\xc7\xc0\xd8\x00\x00\x00\x0f"
"\x79\xd0\x48\xc7\xc2\x12\x68\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79"
"\xd0\x48\xc7\xc2\x14\x68\x00\x00\x48\xc7\xc0\x00\x3a\x00\x00\x0f\x79\xd0"
"\x48\xc7\xc2\x16\x68\x00\x00\x48\xc7\xc0\x00\x10\x00\x00\x0f\x79\xd0\x48"
"\xc7\xc2\x18\x68\x00\x00\x48\xc7\xc0\x00\x38\x00\x00\x0f\x79\xd0\x48\xc7"
"\xc2\x00\x48\x00\x00\x48\xc7\xc0\xff\xff\x0f\x00\x0f\x79\xd0\x48\xc7\xc2"
"\x02\x48\x00\x00\x48\xc7\xc0\xff\xff\x0f\x00\x0f\x79\xd0\x48\xc7\xc2\x04"
"\x48\x00\x00\x48\xc7\xc0\xff\xff\x0f\x00\x0f\x79\xd0\x48\xc7\xc2\x06\x48"
"\x00\x00\x48\xc7\xc0\xff\xff\x0f\x00\x0f\x79\xd0\x48\xc7\xc2\x08\x48\x00"
"\x00\x48\xc7\xc0\xff\xff\x0f\x00\x0f\x79\xd0\x48\xc7\xc2\x0a\x48\x00\x00"
"\x48\xc7\xc0\xff\xff\x0f\x00\x0f\x79\xd0\x48\xc7\xc2\x0c\x48\x00\x00\x48"
"\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x0e\x48\x00\x00\x48\xc7"
"\xc0\xff\x1f\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x10\x48\x00\x00\x48\xc7\xc0"
"\xff\x1f\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x12\x48\x00\x00\x48\xc7\xc0\xff"
"\x1f\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x14\x48\x00\x00\x48\xc7\xc0\x93\x40"
"\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x16\x48\x00\x00\x48\xc7\xc0\x9b\x20\x00"
"\x00\x0f\x79\xd0\x48\xc7\xc2\x18\x48\x00\x00\x48\xc7\xc0\x93\x40\x00\x00"
"\x0f\x79\xd0\x48\xc7\xc2\x1a\x48\x00\x00\x48\xc7\xc0\x93\x40\x00\x00\x0f"
"\x79\xd0\x48\xc7\xc2\x1c\x48\x00\x00\x48\xc7\xc0\x93\x40\x00\x00\x0f\x79"
"\xd0\x48\xc7\xc2\x1e\x48\x00\x00\x48\xc7\xc0\x93\x40\x00\x00\x0f\x79\xd0"
"\x48\xc7\xc2\x20\x48\x00\x00\x48\xc7\xc0\x82\x00\x00\x00\x0f\x79\xd0\x48"
"\xc7\xc2\x22\x48\x00\x00\x48\xc7\xc0\x8b\x00\x00\x00\x0f\x79\xd0\x48\xc7"
"\xc2\x1c\x68\x00\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2"
"\x1e\x68\x00\x00\x48\xc7\xc0\x00\x91\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x20"
"\x68\x00\x00\x48\xc7\xc0\x02\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x06\x28"
"\x00\x00\x48\xc7\xc0\x00\x05\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x0a\x28\x00"
"\x00\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x0c\x28\x00\x00"
"\x48\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x0e\x28\x00\x00\x48"
"\xc7\xc0\x00\x00\x00\x00\x0f\x79\xd0\x48\xc7\xc2\x10\x28\x00\x00\x48\xc7"
"\xc0\x00\x00\x00\x00\x0f\x79\xd0\x0f\x20\xc0\x48\xc7\xc2\x00\x68\x00\x00"
"\x48\x89\xc0\x0f\x79\xd0\x0f\x20\xd8\x48\xc7\xc2\x02\x68\x00\x00\x48\x89"
"\xc0\x0f\x79\xd0\x0f\x20\xe0\x48\xc7\xc2\x04\x68\x00\x00\x48\x89\xc0\x0f"
"\x79\xd0\x48\xc7\xc0\x18\x5f\x00\x00\x48\x8b\x10\x48\xc7\xc0\x20\x5f\x00"
"\x00\x48\x8b\x08\x48\x31\xc0\x0f\x78\xd0\x48\x31\xc8\x0f\x79\xd0\x0f\x01"
"\xc2\x48\xc7\xc2\x00\x44\x00\x00\x0f\x78\xd0\xf4";
const char kvm_asm64_vm_exit[] =
"\x48\xc7\xc3\x00\x44\x00\x00\x0f\x78\xda\x48\xc7\xc3\x02\x44\x00\x00\x0f"
"\x78\xd9\x48\xc7\xc0\x00\x64\x00\x00\x0f\x78\xc0\x48\xc7\xc3\x1e\x68\x00"
"\x00\x0f\x78\xdb\xf4";
#define CR0_PE 1
#define CR0_NE (1 << 5)
#define CR4_PAE (1 << 5)
#define EFER_SCE 1
#define EFER_LME (1 << 8)
#define PDE64_PRESENT 1
#define PDE64_RW (1 << 1)
#define PDE64_USER (1 << 2)
#define PDE64_PS (1 << 7)
struct tss32 {
uint16_t prev, prevh;
uint32_t sp0;
uint16_t ss0, ss0h;
uint32_t sp1;
uint16_t ss1, ss1h;
uint32_t sp2;
uint16_t ss2, ss2h;
uint32_t cr3;
uint32_t ip;
uint32_t flags;
uint32_t ax;
uint32_t cx;
uint32_t dx;
uint32_t bx;
uint32_t sp;
uint32_t bp;
uint32_t si;
uint32_t di;
uint16_t es, esh;
uint16_t cs, csh;
uint16_t ss, ssh;
uint16_t ds, dsh;
uint16_t fs, fsh;
uint16_t gs, gsh;
uint16_t ldt, ldth;
uint16_t trace;
uint16_t io_bitmap;
} __attribute__((packed));
struct tss64 {
uint32_t reserved0;
uint64_t rsp[3];
uint64_t reserved1;
uint64_t ist[7];
uint64_t reserved2;
uint32_t reserved3;
uint32_t io_bitmap;
} __attribute__((packed));
static void fill_segment_descriptor(uint64_t* dt,
struct kvm_segment* seg)
{
uint16_t index = seg->selector >> 3;
uint64_t limit = seg->g ? seg->limit >> 12 : seg->limit;
uint64_t sd = (limit & 0xffff) | (seg->base & 0xffffff) << 16 |
(uint64_t)seg->type << 40 | (uint64_t)seg->s << 44 |
(uint64_t)seg->dpl << 45 | (uint64_t)seg->present << 47 |
(limit & 0xf0000ULL) << 48 | (uint64_t)seg->avl << 52 |
(uint64_t)seg->l << 53 | (uint64_t)seg->db << 54 |
(uint64_t)seg->g << 55 | (seg->base & 0xff000000ULL) << 56;
dt[index] = sd;
}
static void fill_segment_descriptor_dword(uint64_t* dt,
struct kvm_segment* seg)
{
fill_segment_descriptor(dt, seg);
uint16_t index = seg->selector >> 3;
dt[index + 1] = 0;
}
int kvmfd;
static volatile long syz_kvm_setup_cpu(const int vmfd, const int cpufd)
{
char *const host_mem = mmap(NULL, /*len=*/0x19000ul,
PROT_WRITE|PROT_READ, MAP_ANON|MAP_PRIVATE, -1, 0);
const uintptr_t page_size = 4 << 10;
const uintptr_t guest_mem_size = 24 * page_size;
struct kvm_userspace_memory_region memreg;
memreg.slot = 0;
memreg.flags = 0;
memreg.guest_phys_addr = 0;
memreg.memory_size = guest_mem_size;
memreg.userspace_addr = (uintptr_t)host_mem;
ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &memreg);
struct kvm_segment seg_cs32;
memset(&seg_cs32, 0, sizeof(seg_cs32));
seg_cs32.selector = SEL_CS32;
seg_cs32.type = 11;
seg_cs32.limit = 0xfffff;
seg_cs32.present = 1;
seg_cs32.s = 1;
seg_cs32.db = 1;
struct kvm_segment seg_ds32;
memset(&seg_ds32, 0, sizeof(seg_ds32));
seg_ds32.selector = SEL_DS32;
seg_ds32.type = 3;
seg_ds32.limit = 0xfffff;
seg_ds32.present = 1;
seg_ds32.s = 1;
seg_ds32.db = 1;
struct kvm_segment seg_tss32;
memset(&seg_tss32, 0, sizeof(seg_tss32));
seg_tss32.selector = SEL_TSS32;
seg_tss32.type = 11;
seg_tss32.base = ADDR_VAR_TSS32;
seg_tss32.limit = 0x1ff;
seg_tss32.present = 1;
struct kvm_segment seg_cs64 = seg_cs32;
seg_cs64.selector = SEL_CS64;
seg_cs64.db = 0;
seg_cs64.l = 1;
struct kvm_segment seg_ds64 = seg_ds32;
seg_ds64.selector = SEL_DS64;
struct kvm_segment seg_tss64 = seg_tss32;
seg_tss64.selector = SEL_TSS64;
seg_tss64.base = ADDR_VAR_TSS64;
seg_tss64.type = 9;
char buf[sizeof(struct kvm_cpuid2) + 128 * sizeof(struct kvm_cpuid_entry2)];
memset(buf, 0, sizeof(buf));
struct kvm_cpuid2* cpuid = (struct kvm_cpuid2*)buf;
cpuid->nent = 128;
ioctl(kvmfd, KVM_GET_SUPPORTED_CPUID, cpuid);
ioctl(cpufd, KVM_SET_CPUID2, cpuid);
uint64_t* pml4 = (uint64_t*)(host_mem + ADDR_PML4);
pml4[0] = PDE64_PRESENT | PDE64_RW | PDE64_USER | ADDR_PDP;
uint64_t* pdpt = (uint64_t*)(host_mem + ADDR_PDP);
pdpt[0] = PDE64_PRESENT | PDE64_RW | PDE64_USER | ADDR_PD;
uint64_t* pd = (uint64_t*)(host_mem + ADDR_PD);
pd[0] = PDE64_PRESENT | PDE64_RW | PDE64_USER | PDE64_PS;
*((uint64_t*)(host_mem + ADDR_VAR_VMXON_PTR)) = ADDR_VAR_VMXON;
*((uint64_t*)(host_mem + ADDR_VAR_VMCS_PTR)) = ADDR_VAR_VMCS;
struct tss32 tss32;
memset(&tss32, 0, sizeof(tss32));
struct tss32* tss32_addr = (struct tss32*)(host_mem + ADDR_VAR_TSS32);
memcpy(tss32_addr, &tss32, sizeof(tss32));
struct tss64 tss64;
memset(&tss64, 0, sizeof(tss64));
tss64.rsp[0] = ADDR_STACK0;
tss64.rsp[1] = ADDR_STACK0;
tss64.rsp[2] = ADDR_STACK0;
tss64.io_bitmap = offsetof(struct tss64, io_bitmap);
struct tss64* tss64_addr = (struct tss64*)(host_mem + ADDR_VAR_TSS64);
memcpy(tss64_addr, &tss64, sizeof(tss64));
memcpy(host_mem + ADDR_TEXT,
kvm_asm64_init_vm, sizeof(kvm_asm64_init_vm) - 1);
memcpy(host_mem + ADDR_VAR_VMEXIT_CODE, kvm_asm64_vm_exit,
sizeof(kvm_asm64_vm_exit) - 1);
*((uint64_t*)(host_mem + ADDR_VAR_VMEXIT_PTR)) = ADDR_VAR_VMEXIT_CODE;
*(host_mem + ADDR_VAR_USER_CODE) = 0xf4;
*(host_mem + ADDR_VAR_HLT) = 0xf4;
memcpy(host_mem + ADDR_VAR_SYSRET, "\x0f\x07\xf4", 3);
memcpy(host_mem + ADDR_VAR_SYSEXIT, "\x0f\x35\xf4", 3);
*(uint64_t*)(host_mem + ADDR_VAR_VMWRITE_FLD) = 0;
*(uint64_t*)(host_mem + ADDR_VAR_VMWRITE_VAL) = 0;
uint64_t* gdt = (uint64_t*)(host_mem + ADDR_GDT);
fill_segment_descriptor(gdt, &seg_cs32);
fill_segment_descriptor(gdt, &seg_ds32);
fill_segment_descriptor(gdt, &seg_tss32);
fill_segment_descriptor(gdt, &seg_cs64);
fill_segment_descriptor(gdt, &seg_ds64);
fill_segment_descriptor_dword(gdt, &seg_tss64);
struct kvm_sregs sregs;
memset(&sregs, 0, sizeof (sregs));
sregs.cs = seg_cs32;
sregs.ds = sregs.es = sregs.fs = sregs.gs = sregs.ss = seg_ds32;
sregs.tr = seg_tss32;
sregs.ldt.unusable = 1;
sregs.cr0 = CR0_PE | CR0_NE;
sregs.cr3 = ADDR_PML4;
sregs.cr4 = CR4_PAE;
sregs.efer = EFER_LME | EFER_SCE;
sregs.gdt.base = ADDR_GDT;
sregs.gdt.limit = 256 * sizeof(uint64_t) - 1;
sregs.idt.base = 0;
sregs.idt.limit = 0;
if (ioctl(cpufd, KVM_SET_SREGS, &sregs))
return -1;
struct kvm_regs regs;
memset(®s, 0, sizeof(regs));
regs.rip = ADDR_TEXT;
regs.rsp = ADDR_STACK0;
regs.rflags = 2;
if (ioctl(cpufd, KVM_SET_REGS, ®s))
return -1;
return 0;
}
static void do_set_regs(int vcpufd)
{
struct kvm_regs regs;
ioctl(vcpufd, KVM_GET_REGS, ®s);
regs.rflags = 0x20002;
ioctl(vcpufd, KVM_SET_REGS, ®s);
}
int main(int argc, char **argv)
{
int i;
kvmfd = open("/dev/kvm", 0);
int vmfd = ioctl(kvmfd, KVM_CREATE_VM, 0);
int vcpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0);
// mmap(&(0x7f0000000000/0x3000)=nil, 0x3000, 0x2, 0x13, r2, 0x0)
struct kvm_run *run = mmap(NULL, 0x3000, PROT_READ|PROT_WRITE, MAP_SHARED, vcpufd, 0);
// ioctl$KVM_REGISTER_COALESCED_MMIO(0xffffffffffffffff, 0x4010ae67, &(0x7f0000000000)={0x2})
// invalid vcpufd, but 0x2 is written to the mmap-ed area above!
run->request_interrupt_window = 1;
// syz_kvm_setup_cpu$x86(r1, r2, &(0x7f0000fe7000/0x18000)=nil, &(0x7f0000000340)=[@text64={0x40, 0x0}], 0x1, 0x50, 0x0, 0x0)
syz_kvm_setup_cpu(vmfd, vcpufd);
// ioctl$KVM_RUN(r2, 0xae80, 0x0) (rerun: 64)
for (i = 0; i < atoi(argv[1]); i++)
ioctl(vcpufd, KVM_RUN, 0);
// ioctl$KVM_SET_REGS(r2, 0x4090ae82, &(0x7f00000000c0)={[0x7], 0x0, 0x60000})
// interleaved with KVM_RUN
do_set_regs(vcpufd);
// one more KVM_RUN to show the bug
ioctl(vcpufd, KVM_RUN, 0);
return 0;
}