The patch titled kvm: userspace interface has been added to the -mm tree. Its filename is kvm-userspace-interface.patch See http://www.zip.com.au/~akpm/linux/patches/stuff/added-to-mm.txt to find out what to do about this ------------------------------------------------------ Subject: kvm: userspace interface From: Avi Kivity <avi@xxxxxxxxxxxx> web site: http://kvm.sourceforge.net mailing list: kvm-devel@xxxxxxxxxxxxxxxxxxxxx (http://lists.sourceforge.net/lists/listinfo/kvm-devel) The following patchset adds a driver for Intel's hardware virtualization extensions to the x86 architecture. The driver adds a character device (/dev/kvm) that exposes the virtualization capabilities to userspace. Using this driver, a process can run a virtual machine (a "guest") in a fully virtualized PC containing its own virtual hard disks, network adapters, and display. Using this driver, one can start multiple virtual machines on a host. Each virtual machine is a process on the host; a virtual cpu is a thread in that process. kill(1), nice(1), top(1) work as expected. In effect, the driver adds a third execution mode to the existing two: we now have kernel mode, user mode, and guest mode. Guest mode has its own address space mapping guest physical memory (which is accessible to user mode by mmap()ing /dev/kvm). Guest mode has no access to any I/O devices; any such access is intercepted and directed to user mode for emulation. The driver supports i386 and x86_64 hosts and guests. All combinations are allowed except x86_64 guest on i386 host. For i386 guests and hosts, both pae and non-pae paging modes are supported. SMP hosts and UP guests are supported. At the moment only Intel hardware is supported, but AMD virtualization support is being worked on. Performance currently is non-stellar due to the naive implementation of the mmu virtualization, which throws away most of the shadow page table entries every context switch. We plan to address this in two ways: - cache shadow page tables across tlb flushes - wait until AMD and Intel release processors with nested page tables Currently a virtual desktop is responsive but consumes a lot of CPU. Under Windows I tried playing pinball and watching a few flash movies; with a recent CPU one can hardly feel the virtualization. Linux/X is slower, probably due to X being in a separate process. In addition to the driver, you need a slightly modified qemu to provide I/O device emulation and the BIOS. Caveats: - The Windows install currently bluescreens due to a problem with the virtual APIC. We are working on a fix. A temporary workaround is to use an existing image or install through qemu - Windows 64-bit does not work. That's also true for qemu, so it's probably a problem with the device model. This patch: Define a bunch of ioctl()s on /dev/kvm. The ioctl()s allow adding memory to a virtual machine, adding a virtual cpu to a virtual machine (at most one at this time), transferring control to the virtual cpu, and querying about guest pages changed by the virtual machine. Still TODO: - inode per vcpu - pseudo filesystem + syscalls? Signed-off-by: Yaniv Kamay <yaniv@xxxxxxxxxxxx> Signed-off-by: Avi Kivity <avi@xxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxx> --- include/linux/kvm.h | 198 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 198 insertions(+) diff -puN /dev/null include/linux/kvm.h --- /dev/null +++ a/include/linux/kvm.h @@ -0,0 +1,198 @@ +#ifndef __LINUX_KVM_H +#define __LINUX_KVM_H + +/* + * Userspace interface for /dev/kvm - kernel based virtual machine + * + * Note: this interface is considered experimental and may change without + * notice. + */ + +#include <asm/types.h> +#include <linux/ioctl.h> + +/* for KVM_CREATE_MEMORY_REGION */ +struct kvm_memory_region { + __u32 slot; + __u32 flags; + __u64 guest_phys_addr; + __u64 memory_size; /* bytes */ +}; + +/* for kvm_memory_region::flags */ +#define KVM_MEM_LOG_DIRTY_PAGES 1UL + + +#define KVM_EXIT_TYPE_FAIL_ENTRY 1 +#define KVM_EXIT_TYPE_VM_EXIT 2 + +enum kvm_exit_reason { + KVM_EXIT_UNKNOWN, + KVM_EXIT_EXCEPTION, + KVM_EXIT_IO, + KVM_EXIT_CPUID, + KVM_EXIT_DEBUG, + KVM_EXIT_HLT, + KVM_EXIT_MMIO, +}; + +/* for KVM_RUN */ +struct kvm_run { + /* in */ + __u32 vcpu; + __u32 emulated; /* skip current instruction */ + __u32 mmio_completed; /* mmio request completed */ + + /* out */ + __u32 exit_type; + __u32 exit_reason; + __u32 instruction_length; + union { + /* KVM_EXIT_UNKNOWN */ + struct { + __u32 hardware_exit_reason; + } hw; + /* KVM_EXIT_EXCEPTION */ + struct { + __u32 exception; + __u32 error_code; + } ex; + /* KVM_EXIT_IO */ + struct { +#define KVM_EXIT_IO_IN 0 +#define KVM_EXIT_IO_OUT 1 + __u8 direction; + __u8 size; /* bytes */ + __u8 string; + __u8 string_down; + __u8 rep; + __u8 pad; + __u16 port; + __u64 count; + union { + __u64 address; + __u32 value; + }; + } io; + struct { + } debug; + /* KVM_EXIT_MMIO */ + struct { + __u64 phys_addr; + __u8 data[8]; + __u32 len; + __u8 is_write; + } mmio; + }; +}; + +/* for KVM_GET_REGS and KVM_SET_REGS */ +struct kvm_regs { + /* in */ + __u32 vcpu; + __u32 padding; + + /* out (KVM_GET_REGS) / in (KVM_SET_REGS) */ + __u64 rax, rbx, rcx, rdx; + __u64 rsi, rdi, rsp, rbp; + __u64 r8, r9, r10, r11; + __u64 r12, r13, r14, r15; + __u64 rip, rflags; +}; + +struct kvm_segment { + __u64 base; + __u32 limit; + __u16 selector; + __u8 type; + __u8 present, dpl, db, s, l, g, avl; + __u8 unusable; + __u8 padding; +}; + +struct kvm_dtable { + __u64 base; + __u16 limit; + __u16 padding[3]; +}; + +/* for KVM_GET_SREGS and KVM_SET_SREGS */ +struct kvm_sregs { + /* in */ + __u32 vcpu; + __u32 padding; + + /* out (KVM_GET_SREGS) / in (KVM_SET_SREGS) */ + struct kvm_segment cs, ds, es, fs, gs, ss; + struct kvm_segment tr, ldt; + struct kvm_dtable gdt, idt; + __u64 cr0, cr2, cr3, cr4, cr8; + __u64 efer; + __u64 apic_base; + + /* out (KVM_GET_SREGS) */ + __u32 pending_int; + __u32 padding2; +}; + +/* for KVM_TRANSLATE */ +struct kvm_translation { + /* in */ + __u64 linear_address; + __u32 vcpu; + __u32 padding; + + /* out */ + __u64 physical_address; + __u8 valid; + __u8 writeable; + __u8 usermode; +}; + +/* for KVM_INTERRUPT */ +struct kvm_interrupt { + /* in */ + __u32 vcpu; + __u32 irq; +}; + +struct kvm_breakpoint { + __u32 enabled; + __u32 padding; + __u64 address; +}; + +/* for KVM_DEBUG_GUEST */ +struct kvm_debug_guest { + /* int */ + __u32 vcpu; + __u32 enabled; + struct kvm_breakpoint breakpoints[4]; + __u32 singlestep; +}; + +/* for KVM_GET_DIRTY_LOG */ +struct kvm_dirty_log { + __u32 slot; + __u32 padding; + union { + void __user *dirty_bitmap; /* one bit per page */ + __u64 padding; + }; +}; + +#define KVMIO 0xAE + +#define KVM_RUN _IOWR(KVMIO, 2, struct kvm_run) +#define KVM_GET_REGS _IOWR(KVMIO, 3, struct kvm_regs) +#define KVM_SET_REGS _IOW(KVMIO, 4, struct kvm_regs) +#define KVM_GET_SREGS _IOWR(KVMIO, 5, struct kvm_sregs) +#define KVM_SET_SREGS _IOW(KVMIO, 6, struct kvm_sregs) +#define KVM_TRANSLATE _IOWR(KVMIO, 7, struct kvm_translation) +#define KVM_INTERRUPT _IOW(KVMIO, 8, struct kvm_interrupt) +#define KVM_DEBUG_GUEST _IOW(KVMIO, 9, struct kvm_debug_guest) +#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 10, struct kvm_memory_region) +#define KVM_CREATE_VCPU _IOW(KVMIO, 11, int /* vcpu_slot */) +#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 12, struct kvm_dirty_log) + +#endif _ Patches currently in -mm which might be from avi@xxxxxxxxxxxx are kvm-userspace-interface.patch kvm-intel-virtual-mode-extensions-definitions.patch kvm-kvm-data-structures.patch kvm-random-accessors-and-constants.patch kvm-virtualization-infrastructure.patch kvm-memory-slot-management.patch kvm-vcpu-creation-and-maintenance.patch kvm-vcpu-execution-loop.patch kvm-define-exit-handlers.patch kvm-less-common-exit-handlers.patch kvm-mmu.patch kvm-x86-emulator.patch kvm-plumbing.patch kvm-dynamically-determine-which-msrs-to-load-and-save.patch kvm-fix-calculation-of-initial-value-of-rdx-register.patch - To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html