The patch titled KVM: move the vmx exit handlers to vmx.c has been added to the -mm tree. Its filename is kvm-move-the-vmx-exit-handlers-to-vmxc.patch See http://www.zip.com.au/~akpm/linux/patches/stuff/added-to-mm.txt to find out what to do about this ------------------------------------------------------ Subject: KVM: move the vmx exit handlers to vmx.c From: Avi Kivity <avi@xxxxxxxxxxxx> Signed-off-by: Avi Kivity <avi@xxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxx> --- drivers/kvm/kvm.h | 23 +- drivers/kvm/kvm_main.c | 448 +-------------------------------------- drivers/kvm/vmx.c | 411 +++++++++++++++++++++++++++++++++++ 3 files changed, 448 insertions(+), 434 deletions(-) diff -puN drivers/kvm/kvm.h~kvm-move-the-vmx-exit-handlers-to-vmxc drivers/kvm/kvm.h --- a/drivers/kvm/kvm.h~kvm-move-the-vmx-exit-handlers-to-vmxc +++ a/drivers/kvm/kvm.h @@ -266,6 +266,7 @@ struct kvm_arch_ops { void (*decache_regs)(struct kvm_vcpu *vcpu); int (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run); + void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); unsigned long vmx_return; /* temporary hack */ }; @@ -300,6 +301,14 @@ static inline struct page *gfn_to_page(s struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); void mark_page_dirty(struct kvm *kvm, gfn_t gfn); +enum emulation_result { + EMULATE_DONE, /* no further processing */ + EMULATE_DO_MMIO, /* kvm_run filled with mmio request */ + EMULATE_FAIL, /* can't emulate this instruction */ +}; + +int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run, + unsigned long cr2, u16 error_code); void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, @@ -309,10 +318,22 @@ unsigned long realmode_get_cr(struct kvm void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long value, unsigned long *rflags); +void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); +void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr0); +void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr0); +void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr0); +void lmsw(struct kvm_vcpu *vcpu, unsigned long msw); + +void inject_gp(struct kvm_vcpu *vcpu); + +#ifdef __x86_64__ +void set_efer(struct kvm_vcpu *vcpu, u64 efer); +#endif + + void load_msrs(struct vmx_msr_entry *e, int n); void save_msrs(struct vmx_msr_entry *e, int n); void kvm_resched(struct kvm_vcpu *vcpu); -int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); int kvm_read_guest(struct kvm_vcpu *vcpu, gva_t addr, diff -puN drivers/kvm/kvm_main.c~kvm-move-the-vmx-exit-handlers-to-vmxc drivers/kvm/kvm_main.c --- a/drivers/kvm/kvm_main.c~kvm-move-the-vmx-exit-handlers-to-vmxc +++ a/drivers/kvm/kvm_main.c @@ -533,7 +533,7 @@ void vmcs_writel(unsigned long field, un } EXPORT_SYMBOL_GPL(vmcs_writel); -static void inject_gp(struct kvm_vcpu *vcpu) +void inject_gp(struct kvm_vcpu *vcpu) { printk(KERN_DEBUG "inject_general_protection: rip 0x%lx\n", vmcs_readl(GUEST_RIP)); @@ -544,6 +544,7 @@ static void inject_gp(struct kvm_vcpu *v INTR_INFO_DELIEVER_CODE_MASK | INTR_INFO_VALID_MASK); } +EXPORT_SYMBOL_GPL(inject_gp); /* * reads and returns guest's timestamp counter "register" @@ -821,7 +822,7 @@ static int pdptrs_have_reserved_bits_set return i != 4; } -static void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) +void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { if (cr0 & CR0_RESEVED_BITS) { printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n", @@ -879,8 +880,9 @@ static void set_cr0(struct kvm_vcpu *vcp spin_unlock(&vcpu->kvm->lock); return; } +EXPORT_SYMBOL_GPL(set_cr0); -static void lmsw(struct kvm_vcpu *vcpu, unsigned long msw) +void lmsw(struct kvm_vcpu *vcpu, unsigned long msw) { unsigned long cr0 = vcpu->cr0; @@ -895,6 +897,7 @@ static void lmsw(struct kvm_vcpu *vcpu, | (msw & LMSW_GUEST_MASK)); vcpu->cr0 = (vcpu->cr0 & ~0xfffful) | msw; } +EXPORT_SYMBOL_GPL(lmsw); static void __set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { @@ -904,7 +907,7 @@ static void __set_cr4(struct kvm_vcpu *v vcpu->cr4 = cr4; } -static void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { if (cr4 & CR4_RESEVED_BITS) { printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n"); @@ -935,8 +938,9 @@ static void set_cr4(struct kvm_vcpu *vcp kvm_mmu_reset_context(vcpu); spin_unlock(&vcpu->kvm->lock); } +EXPORT_SYMBOL_GPL(set_cr4); -static void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) +void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { if (is_long_mode()) { if ( cr3 & CR3_L_MODE_RESEVED_BITS) { @@ -964,8 +968,9 @@ static void set_cr3(struct kvm_vcpu *vcp vcpu->mmu.new_cr3(vcpu); spin_unlock(&vcpu->kvm->lock); } +EXPORT_SYMBOL_GPL(set_cr3); -static void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) +void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) { if ( cr8 & CR8_RESEVED_BITS) { printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8); @@ -974,6 +979,7 @@ static void set_cr8(struct kvm_vcpu *vcp } vcpu->cr8 = cr8; } +EXPORT_SYMBOL_GPL(set_cr8); static u32 get_rdx_init_val(void) { @@ -1534,25 +1540,6 @@ void mark_page_dirty(struct kvm *kvm, gf } } -static void skip_emulated_instruction(struct kvm_vcpu *vcpu) -{ - unsigned long rip; - u32 interruptibility; - - rip = vmcs_readl(GUEST_RIP); - rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN); - vmcs_writel(GUEST_RIP, rip); - - /* - * We emulated an instruction, so temporary interrupt blocking - * should be removed, if set. - */ - interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); - if (interruptibility & 3) - vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, - interruptibility & ~3); -} - static int emulator_read_std(unsigned long addr, unsigned long *val, unsigned int bytes, @@ -1694,16 +1681,10 @@ struct x86_emulate_ops emulate_ops = { .cmpxchg_emulated = emulator_cmpxchg_emulated, }; -enum emulation_result { - EMULATE_DONE, /* no further processing */ - EMULATE_DO_MMIO, /* kvm_run filled with mmio request */ - EMULATE_FAIL, /* can't emulate this instruction */ -}; - -static int emulate_instruction(struct kvm_vcpu *vcpu, - struct kvm_run *run, - unsigned long cr2, - u16 error_code) +int emulate_instruction(struct kvm_vcpu *vcpu, + struct kvm_run *run, + unsigned long cr2, + u16 error_code) { struct x86_emulate_ctxt emulate_ctxt; int r; @@ -1762,6 +1743,7 @@ static int emulate_instruction(struct kv return EMULATE_DONE; } +EXPORT_SYMBOL_GPL(emulate_instruction); static u64 mk_cr_64(u64 curr_cr, u32 new_val) { @@ -1826,298 +1808,6 @@ void realmode_set_cr(struct kvm_vcpu *vc } } -static int handle_rmode_exception(struct kvm_vcpu *vcpu, - int vec, u32 err_code) -{ - if (!vcpu->rmode.active) - return 0; - - if (vec == GP_VECTOR && err_code == 0) - if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE) - return 1; - return 0; -} - -static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -{ - u32 intr_info, error_code; - unsigned long cr2, rip; - u32 vect_info; - enum emulation_result er; - - vect_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); - intr_info = vmcs_read32(VM_EXIT_INTR_INFO); - - if ((vect_info & VECTORING_INFO_VALID_MASK) && - !is_page_fault(intr_info)) { - printk(KERN_ERR "%s: unexpected, vectoring info 0x%x " - "intr info 0x%x\n", __FUNCTION__, vect_info, intr_info); - } - - if (is_external_interrupt(vect_info)) { - int irq = vect_info & VECTORING_INFO_VECTOR_MASK; - set_bit(irq, vcpu->irq_pending); - set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary); - } - - if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */ - asm ("int $2"); - return 1; - } - error_code = 0; - rip = vmcs_readl(GUEST_RIP); - if (intr_info & INTR_INFO_DELIEVER_CODE_MASK) - error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); - if (is_page_fault(intr_info)) { - cr2 = vmcs_readl(EXIT_QUALIFICATION); - - spin_lock(&vcpu->kvm->lock); - if (!vcpu->mmu.page_fault(vcpu, cr2, error_code)) { - spin_unlock(&vcpu->kvm->lock); - return 1; - } - - er = emulate_instruction(vcpu, kvm_run, cr2, error_code); - spin_unlock(&vcpu->kvm->lock); - - switch (er) { - case EMULATE_DONE: - return 1; - case EMULATE_DO_MMIO: - ++kvm_stat.mmio_exits; - kvm_run->exit_reason = KVM_EXIT_MMIO; - return 0; - case EMULATE_FAIL: - vcpu_printf(vcpu, "%s: emulate fail\n", __FUNCTION__); - break; - default: - BUG(); - } - } - - if (vcpu->rmode.active && - handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK, - error_code)) - return 1; - - if ((intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK)) == (INTR_TYPE_EXCEPTION | 1)) { - kvm_run->exit_reason = KVM_EXIT_DEBUG; - return 0; - } - kvm_run->exit_reason = KVM_EXIT_EXCEPTION; - kvm_run->ex.exception = intr_info & INTR_INFO_VECTOR_MASK; - kvm_run->ex.error_code = error_code; - return 0; -} - -static int handle_external_interrupt(struct kvm_vcpu *vcpu, - struct kvm_run *kvm_run) -{ - ++kvm_stat.irq_exits; - return 1; -} - - -static int get_io_count(struct kvm_vcpu *vcpu, u64 *count) -{ - u64 inst; - gva_t rip; - int countr_size; - int i, n; - - if ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_VM)) { - countr_size = 2; - } else { - u32 cs_ar = vmcs_read32(GUEST_CS_AR_BYTES); - - countr_size = (cs_ar & AR_L_MASK) ? 8: - (cs_ar & AR_DB_MASK) ? 4: 2; - } - - rip = vmcs_readl(GUEST_RIP); - if (countr_size != 8) - rip += vmcs_readl(GUEST_CS_BASE); - - n = kvm_read_guest(vcpu, rip, sizeof(inst), &inst); - - for (i = 0; i < n; i++) { - switch (((u8*)&inst)[i]) { - case 0xf0: - case 0xf2: - case 0xf3: - case 0x2e: - case 0x36: - case 0x3e: - case 0x26: - case 0x64: - case 0x65: - case 0x66: - break; - case 0x67: - countr_size = (countr_size == 2) ? 4: (countr_size >> 1); - default: - goto done; - } - } - return 0; -done: - countr_size *= 8; - *count = vcpu->regs[VCPU_REGS_RCX] & (~0ULL >> (64 - countr_size)); - return 1; -} - -static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -{ - u64 exit_qualification; - - ++kvm_stat.io_exits; - exit_qualification = vmcs_read64(EXIT_QUALIFICATION); - kvm_run->exit_reason = KVM_EXIT_IO; - if (exit_qualification & 8) - kvm_run->io.direction = KVM_EXIT_IO_IN; - else - kvm_run->io.direction = KVM_EXIT_IO_OUT; - kvm_run->io.size = (exit_qualification & 7) + 1; - kvm_run->io.string = (exit_qualification & 16) != 0; - kvm_run->io.string_down - = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0; - kvm_run->io.rep = (exit_qualification & 32) != 0; - kvm_run->io.port = exit_qualification >> 16; - if (kvm_run->io.string) { - if (!get_io_count(vcpu, &kvm_run->io.count)) - return 1; - kvm_run->io.address = vmcs_readl(GUEST_LINEAR_ADDRESS); - } else - kvm_run->io.value = vcpu->regs[VCPU_REGS_RAX]; /* rax */ - return 0; -} - -static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -{ - u64 address = vmcs_read64(EXIT_QUALIFICATION); - int instruction_length = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); - spin_lock(&vcpu->kvm->lock); - vcpu->mmu.inval_page(vcpu, address); - spin_unlock(&vcpu->kvm->lock); - vmcs_writel(GUEST_RIP, vmcs_readl(GUEST_RIP) + instruction_length); - return 1; -} - -static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -{ - u64 exit_qualification; - int cr; - int reg; - -#ifdef KVM_DEBUG - if (guest_cpl() != 0) { - vcpu_printf(vcpu, "%s: not supervisor\n", __FUNCTION__); - inject_gp(vcpu); - return 1; - } -#endif - - exit_qualification = vmcs_read64(EXIT_QUALIFICATION); - cr = exit_qualification & 15; - reg = (exit_qualification >> 8) & 15; - switch ((exit_qualification >> 4) & 3) { - case 0: /* mov to cr */ - switch (cr) { - case 0: - kvm_arch_ops->cache_regs(vcpu); - set_cr0(vcpu, vcpu->regs[reg]); - skip_emulated_instruction(vcpu); - return 1; - case 3: - kvm_arch_ops->cache_regs(vcpu); - set_cr3(vcpu, vcpu->regs[reg]); - skip_emulated_instruction(vcpu); - return 1; - case 4: - kvm_arch_ops->cache_regs(vcpu); - set_cr4(vcpu, vcpu->regs[reg]); - skip_emulated_instruction(vcpu); - return 1; - case 8: - kvm_arch_ops->cache_regs(vcpu); - set_cr8(vcpu, vcpu->regs[reg]); - skip_emulated_instruction(vcpu); - return 1; - }; - break; - case 1: /*mov from cr*/ - switch (cr) { - case 3: - kvm_arch_ops->cache_regs(vcpu); - vcpu->regs[reg] = vcpu->cr3; - kvm_arch_ops->decache_regs(vcpu); - skip_emulated_instruction(vcpu); - return 1; - case 8: - printk(KERN_DEBUG "handle_cr: read CR8 " - "cpu erratum AA15\n"); - kvm_arch_ops->cache_regs(vcpu); - vcpu->regs[reg] = vcpu->cr8; - kvm_arch_ops->decache_regs(vcpu); - skip_emulated_instruction(vcpu); - return 1; - } - break; - case 3: /* lmsw */ - lmsw(vcpu, (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f); - - skip_emulated_instruction(vcpu); - return 1; - default: - break; - } - kvm_run->exit_reason = 0; - printk(KERN_ERR "kvm: unhandled control register: op %d cr %d\n", - (int)(exit_qualification >> 4) & 3, cr); - return 0; -} - -static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -{ - u64 exit_qualification; - unsigned long val; - int dr, reg; - - /* - * FIXME: this code assumes the host is debugging the guest. - * need to deal with guest debugging itself too. - */ - exit_qualification = vmcs_read64(EXIT_QUALIFICATION); - dr = exit_qualification & 7; - reg = (exit_qualification >> 8) & 15; - kvm_arch_ops->cache_regs(vcpu); - if (exit_qualification & 16) { - /* mov from dr */ - switch (dr) { - case 6: - val = 0xffff0ff0; - break; - case 7: - val = 0x400; - break; - default: - val = 0; - } - vcpu->regs[reg] = val; - } else { - /* mov to dr */ - } - kvm_arch_ops->decache_regs(vcpu); - skip_emulated_instruction(vcpu); - return 1; -} - -static int handle_cpuid(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -{ - kvm_run->exit_reason = KVM_EXIT_CPUID; - return 0; -} - /* * Reads an msr value (of 'msr_index') into 'pdata'. * Returns 0 on success, non-0 otherwise. @@ -2128,23 +1818,6 @@ static int get_msr(struct kvm_vcpu *vcpu return kvm_arch_ops->get_msr(vcpu, msr_index, pdata); } -static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -{ - u32 ecx = vcpu->regs[VCPU_REGS_RCX]; - u64 data; - - if (get_msr(vcpu, ecx, &data)) { - inject_gp(vcpu); - return 1; - } - - /* FIXME: handling of bits 32:63 of rax, rdx */ - vcpu->regs[VCPU_REGS_RAX] = data & -1u; - vcpu->regs[VCPU_REGS_RDX] = (data >> 32) & -1u; - skip_emulated_instruction(vcpu); - return 1; -} - #ifdef __x86_64__ void set_efer(struct kvm_vcpu *vcpu, u64 efer) @@ -2175,7 +1848,6 @@ void set_efer(struct kvm_vcpu *vcpu, u64 if (!(efer & EFER_LMA)) efer &= ~EFER_LME; msr->data = efer; - skip_emulated_instruction(vcpu); } EXPORT_SYMBOL_GPL(set_efer); @@ -2191,90 +1863,6 @@ static int set_msr(struct kvm_vcpu *vcpu return kvm_arch_ops->set_msr(vcpu, msr_index, data); } -static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -{ - u32 ecx = vcpu->regs[VCPU_REGS_RCX]; - u64 data = (vcpu->regs[VCPU_REGS_RAX] & -1u) - | ((u64)(vcpu->regs[VCPU_REGS_RDX] & -1u) << 32); - - if (set_msr(vcpu, ecx, data) != 0) { - inject_gp(vcpu); - return 1; - } - - if (ecx != MSR_EFER) - skip_emulated_instruction(vcpu); - return 1; -} - -static int handle_interrupt_window(struct kvm_vcpu *vcpu, - struct kvm_run *kvm_run) -{ - /* Turn off interrupt window reporting. */ - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, - vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) - & ~CPU_BASED_VIRTUAL_INTR_PENDING); - return 1; -} - -static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -{ - skip_emulated_instruction(vcpu); - if (vcpu->irq_summary && (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF)) - return 1; - - kvm_run->exit_reason = KVM_EXIT_HLT; - return 0; -} - -/* - * The exit handlers return 1 if the exit was handled fully and guest execution - * may resume. Otherwise they set the kvm_run parameter to indicate what needs - * to be done to userspace and return 0. - */ -static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, - struct kvm_run *kvm_run) = { - [EXIT_REASON_EXCEPTION_NMI] = handle_exception, - [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, - [EXIT_REASON_IO_INSTRUCTION] = handle_io, - [EXIT_REASON_INVLPG] = handle_invlpg, - [EXIT_REASON_CR_ACCESS] = handle_cr, - [EXIT_REASON_DR_ACCESS] = handle_dr, - [EXIT_REASON_CPUID] = handle_cpuid, - [EXIT_REASON_MSR_READ] = handle_rdmsr, - [EXIT_REASON_MSR_WRITE] = handle_wrmsr, - [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window, - [EXIT_REASON_HLT] = handle_halt, -}; - -static const int kvm_vmx_max_exit_handlers = - sizeof(kvm_vmx_exit_handlers) / sizeof(*kvm_vmx_exit_handlers); - -/* - * The guest has exited. See if we can fix it or if we need userspace - * assistance. - */ -int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) -{ - u32 vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); - u32 exit_reason = vmcs_read32(VM_EXIT_REASON); - - if ( (vectoring_info & VECTORING_INFO_VALID_MASK) && - exit_reason != EXIT_REASON_EXCEPTION_NMI ) - printk(KERN_WARNING "%s: unexpected, valid vectoring info and " - "exit reason is 0x%x\n", __FUNCTION__, exit_reason); - kvm_run->instruction_length = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); - if (exit_reason < kvm_vmx_max_exit_handlers - && kvm_vmx_exit_handlers[exit_reason]) - return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); - else { - kvm_run->exit_reason = KVM_EXIT_UNKNOWN; - kvm_run->hw.hardware_exit_reason = exit_reason; - } - return 0; -} -EXPORT_SYMBOL_GPL(kvm_handle_exit); - void kvm_resched(struct kvm_vcpu *vcpu) { vcpu_put(vcpu); @@ -2315,7 +1903,7 @@ static int kvm_dev_ioctl_run(struct kvm return -ENOENT; if (kvm_run->emulated) { - skip_emulated_instruction(vcpu); + kvm_arch_ops->skip_emulated_instruction(vcpu); kvm_run->emulated = 0; } diff -puN drivers/kvm/vmx.c~kvm-move-the-vmx-exit-handlers-to-vmxc drivers/kvm/vmx.c --- a/drivers/kvm/vmx.c~kvm-move-the-vmx-exit-handlers-to-vmxc +++ a/drivers/kvm/vmx.c @@ -44,11 +44,24 @@ u64 guest_read_tsc(void); void guest_write_tsc(u64 guest_tsc); struct vmx_msr_entry *find_msr_entry(struct kvm_vcpu *vcpu, u32 msr); -#ifdef __x86_64__ +static void skip_emulated_instruction(struct kvm_vcpu *vcpu) +{ + unsigned long rip; + u32 interruptibility; -void set_efer(struct kvm_vcpu *vcpu, u64 efer); + rip = vmcs_readl(GUEST_RIP); + rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN); + vmcs_writel(GUEST_RIP, rip); -#endif + /* + * We emulated an instruction, so temporary interrupt blocking + * should be removed, if set. + */ + interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); + if (interruptibility & 3) + vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, + interruptibility & ~3); +} static void reload_tss(void) { @@ -521,6 +534,397 @@ static void kvm_guest_debug_pre(struct k } } +static int handle_rmode_exception(struct kvm_vcpu *vcpu, + int vec, u32 err_code) +{ + if (!vcpu->rmode.active) + return 0; + + if (vec == GP_VECTOR && err_code == 0) + if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE) + return 1; + return 0; +} + +static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + u32 intr_info, error_code; + unsigned long cr2, rip; + u32 vect_info; + enum emulation_result er; + + vect_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); + intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + + if ((vect_info & VECTORING_INFO_VALID_MASK) && + !is_page_fault(intr_info)) { + printk(KERN_ERR "%s: unexpected, vectoring info 0x%x " + "intr info 0x%x\n", __FUNCTION__, vect_info, intr_info); + } + + if (is_external_interrupt(vect_info)) { + int irq = vect_info & VECTORING_INFO_VECTOR_MASK; + set_bit(irq, vcpu->irq_pending); + set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary); + } + + if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */ + asm ("int $2"); + return 1; + } + error_code = 0; + rip = vmcs_readl(GUEST_RIP); + if (intr_info & INTR_INFO_DELIEVER_CODE_MASK) + error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); + if (is_page_fault(intr_info)) { + cr2 = vmcs_readl(EXIT_QUALIFICATION); + + spin_lock(&vcpu->kvm->lock); + if (!vcpu->mmu.page_fault(vcpu, cr2, error_code)) { + spin_unlock(&vcpu->kvm->lock); + return 1; + } + + er = emulate_instruction(vcpu, kvm_run, cr2, error_code); + spin_unlock(&vcpu->kvm->lock); + + switch (er) { + case EMULATE_DONE: + return 1; + case EMULATE_DO_MMIO: + ++kvm_stat.mmio_exits; + kvm_run->exit_reason = KVM_EXIT_MMIO; + return 0; + case EMULATE_FAIL: + vcpu_printf(vcpu, "%s: emulate fail\n", __FUNCTION__); + break; + default: + BUG(); + } + } + + if (vcpu->rmode.active && + handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK, + error_code)) + return 1; + + if ((intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK)) == (INTR_TYPE_EXCEPTION | 1)) { + kvm_run->exit_reason = KVM_EXIT_DEBUG; + return 0; + } + kvm_run->exit_reason = KVM_EXIT_EXCEPTION; + kvm_run->ex.exception = intr_info & INTR_INFO_VECTOR_MASK; + kvm_run->ex.error_code = error_code; + return 0; +} + +static int handle_external_interrupt(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) +{ + ++kvm_stat.irq_exits; + return 1; +} + + +static int get_io_count(struct kvm_vcpu *vcpu, u64 *count) +{ + u64 inst; + gva_t rip; + int countr_size; + int i, n; + + if ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_VM)) { + countr_size = 2; + } else { + u32 cs_ar = vmcs_read32(GUEST_CS_AR_BYTES); + + countr_size = (cs_ar & AR_L_MASK) ? 8: + (cs_ar & AR_DB_MASK) ? 4: 2; + } + + rip = vmcs_readl(GUEST_RIP); + if (countr_size != 8) + rip += vmcs_readl(GUEST_CS_BASE); + + n = kvm_read_guest(vcpu, rip, sizeof(inst), &inst); + + for (i = 0; i < n; i++) { + switch (((u8*)&inst)[i]) { + case 0xf0: + case 0xf2: + case 0xf3: + case 0x2e: + case 0x36: + case 0x3e: + case 0x26: + case 0x64: + case 0x65: + case 0x66: + break; + case 0x67: + countr_size = (countr_size == 2) ? 4: (countr_size >> 1); + default: + goto done; + } + } + return 0; +done: + countr_size *= 8; + *count = vcpu->regs[VCPU_REGS_RCX] & (~0ULL >> (64 - countr_size)); + return 1; +} + +static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + u64 exit_qualification; + + ++kvm_stat.io_exits; + exit_qualification = vmcs_read64(EXIT_QUALIFICATION); + kvm_run->exit_reason = KVM_EXIT_IO; + if (exit_qualification & 8) + kvm_run->io.direction = KVM_EXIT_IO_IN; + else + kvm_run->io.direction = KVM_EXIT_IO_OUT; + kvm_run->io.size = (exit_qualification & 7) + 1; + kvm_run->io.string = (exit_qualification & 16) != 0; + kvm_run->io.string_down + = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0; + kvm_run->io.rep = (exit_qualification & 32) != 0; + kvm_run->io.port = exit_qualification >> 16; + if (kvm_run->io.string) { + if (!get_io_count(vcpu, &kvm_run->io.count)) + return 1; + kvm_run->io.address = vmcs_readl(GUEST_LINEAR_ADDRESS); + } else + kvm_run->io.value = vcpu->regs[VCPU_REGS_RAX]; /* rax */ + return 0; +} + +static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + u64 address = vmcs_read64(EXIT_QUALIFICATION); + int instruction_length = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); + spin_lock(&vcpu->kvm->lock); + vcpu->mmu.inval_page(vcpu, address); + spin_unlock(&vcpu->kvm->lock); + vmcs_writel(GUEST_RIP, vmcs_readl(GUEST_RIP) + instruction_length); + return 1; +} + +static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + u64 exit_qualification; + int cr; + int reg; + +#ifdef KVM_DEBUG + if (guest_cpl() != 0) { + vcpu_printf(vcpu, "%s: not supervisor\n", __FUNCTION__); + inject_gp(vcpu); + return 1; + } +#endif + + exit_qualification = vmcs_read64(EXIT_QUALIFICATION); + cr = exit_qualification & 15; + reg = (exit_qualification >> 8) & 15; + switch ((exit_qualification >> 4) & 3) { + case 0: /* mov to cr */ + switch (cr) { + case 0: + vcpu_load_rsp_rip(vcpu); + set_cr0(vcpu, vcpu->regs[reg]); + skip_emulated_instruction(vcpu); + return 1; + case 3: + vcpu_load_rsp_rip(vcpu); + set_cr3(vcpu, vcpu->regs[reg]); + skip_emulated_instruction(vcpu); + return 1; + case 4: + vcpu_load_rsp_rip(vcpu); + set_cr4(vcpu, vcpu->regs[reg]); + skip_emulated_instruction(vcpu); + return 1; + case 8: + vcpu_load_rsp_rip(vcpu); + set_cr8(vcpu, vcpu->regs[reg]); + skip_emulated_instruction(vcpu); + return 1; + }; + break; + case 1: /*mov from cr*/ + switch (cr) { + case 3: + vcpu_load_rsp_rip(vcpu); + vcpu->regs[reg] = vcpu->cr3; + vcpu_put_rsp_rip(vcpu); + skip_emulated_instruction(vcpu); + return 1; + case 8: + printk(KERN_DEBUG "handle_cr: read CR8 " + "cpu erratum AA15\n"); + vcpu_load_rsp_rip(vcpu); + vcpu->regs[reg] = vcpu->cr8; + vcpu_put_rsp_rip(vcpu); + skip_emulated_instruction(vcpu); + return 1; + } + break; + case 3: /* lmsw */ + lmsw(vcpu, (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f); + + skip_emulated_instruction(vcpu); + return 1; + default: + break; + } + kvm_run->exit_reason = 0; + printk(KERN_ERR "kvm: unhandled control register: op %d cr %d\n", + (int)(exit_qualification >> 4) & 3, cr); + return 0; +} + +static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + u64 exit_qualification; + unsigned long val; + int dr, reg; + + /* + * FIXME: this code assumes the host is debugging the guest. + * need to deal with guest debugging itself too. + */ + exit_qualification = vmcs_read64(EXIT_QUALIFICATION); + dr = exit_qualification & 7; + reg = (exit_qualification >> 8) & 15; + vcpu_load_rsp_rip(vcpu); + if (exit_qualification & 16) { + /* mov from dr */ + switch (dr) { + case 6: + val = 0xffff0ff0; + break; + case 7: + val = 0x400; + break; + default: + val = 0; + } + vcpu->regs[reg] = val; + } else { + /* mov to dr */ + } + vcpu_put_rsp_rip(vcpu); + skip_emulated_instruction(vcpu); + return 1; +} + +static int handle_cpuid(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + kvm_run->exit_reason = KVM_EXIT_CPUID; + return 0; +} + +static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + u32 ecx = vcpu->regs[VCPU_REGS_RCX]; + u64 data; + + if (vmx_get_msr(vcpu, ecx, &data)) { + inject_gp(vcpu); + return 1; + } + + /* FIXME: handling of bits 32:63 of rax, rdx */ + vcpu->regs[VCPU_REGS_RAX] = data & -1u; + vcpu->regs[VCPU_REGS_RDX] = (data >> 32) & -1u; + skip_emulated_instruction(vcpu); + return 1; +} + +static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + u32 ecx = vcpu->regs[VCPU_REGS_RCX]; + u64 data = (vcpu->regs[VCPU_REGS_RAX] & -1u) + | ((u64)(vcpu->regs[VCPU_REGS_RDX] & -1u) << 32); + + if (vmx_set_msr(vcpu, ecx, data) != 0) { + inject_gp(vcpu); + return 1; + } + + skip_emulated_instruction(vcpu); + return 1; +} + +static int handle_interrupt_window(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) +{ + /* Turn off interrupt window reporting. */ + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, + vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) + & ~CPU_BASED_VIRTUAL_INTR_PENDING); + return 1; +} + +static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + skip_emulated_instruction(vcpu); + if (vcpu->irq_summary && (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF)) + return 1; + + kvm_run->exit_reason = KVM_EXIT_HLT; + return 0; +} + +/* + * The exit handlers return 1 if the exit was handled fully and guest execution + * may resume. Otherwise they set the kvm_run parameter to indicate what needs + * to be done to userspace and return 0. + */ +static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) = { + [EXIT_REASON_EXCEPTION_NMI] = handle_exception, + [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, + [EXIT_REASON_IO_INSTRUCTION] = handle_io, + [EXIT_REASON_INVLPG] = handle_invlpg, + [EXIT_REASON_CR_ACCESS] = handle_cr, + [EXIT_REASON_DR_ACCESS] = handle_dr, + [EXIT_REASON_CPUID] = handle_cpuid, + [EXIT_REASON_MSR_READ] = handle_rdmsr, + [EXIT_REASON_MSR_WRITE] = handle_wrmsr, + [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window, + [EXIT_REASON_HLT] = handle_halt, +}; + +static const int kvm_vmx_max_exit_handlers = + sizeof(kvm_vmx_exit_handlers) / sizeof(*kvm_vmx_exit_handlers); + +/* + * The guest has exited. See if we can fix it or if we need userspace + * assistance. + */ +static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) +{ + u32 vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); + u32 exit_reason = vmcs_read32(VM_EXIT_REASON); + + if ( (vectoring_info & VECTORING_INFO_VALID_MASK) && + exit_reason != EXIT_REASON_EXCEPTION_NMI ) + printk(KERN_WARNING "%s: unexpected, valid vectoring info and " + "exit reason is 0x%x\n", __FUNCTION__, exit_reason); + kvm_run->instruction_length = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); + if (exit_reason < kvm_vmx_max_exit_handlers + && kvm_vmx_exit_handlers[exit_reason]) + return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); + else { + kvm_run->exit_reason = KVM_EXIT_UNKNOWN; + kvm_run->hw.hardware_exit_reason = exit_reason; + } + return 0; +} + static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { u8 fail; @@ -753,6 +1157,7 @@ static struct kvm_arch_ops vmx_arch_ops .decache_regs = vcpu_put_rsp_rip, .run = vmx_vcpu_run, + .skip_emulated_instruction = skip_emulated_instruction, .vmx_return = (unsigned long)kvm_vmx_return, }; _ Patches currently in -mm which might be from avi@xxxxxxxxxxxx are kvm-userspace-interface.patch kvm-userspace-interface-make-enum-values-in-userspace-interface-explicit.patch kvm-intel-virtual-mode-extensions-definitions.patch kvm-kvm-data-structures.patch kvm-random-accessors-and-constants.patch kvm-virtualization-infrastructure.patch kvm-virtualization-infrastructure-kvm-fix-guest-cr4-corruption.patch kvm-virtualization-infrastructure-include-desch.patch kvm-virtualization-infrastructure-fix-segment-state-changes-across-processor-mode-switches.patch kvm-virtualization-infrastructure-fix-asm-constraints-for-segment-loads.patch kvm-virtualization-infrastructure-fix-mmu-reset-locking-when-setting-cr0.patch kvm-memory-slot-management.patch kvm-vcpu-creation-and-maintenance.patch kvm-vcpu-creation-and-maintenance-segment-access-cleanup.patch kvm-workaround-cr0cd-cache-disable-bit-leak-from-guest-to.patch kvm-vcpu-execution-loop.patch kvm-define-exit-handlers.patch kvm-define-exit-handlers-pass-fs-gs-segment-bases-to-x86-emulator.patch kvm-less-common-exit-handlers.patch kvm-less-common-exit-handlers-handle-rdmsrmsr_efer.patch kvm-mmu.patch kvm-x86-emulator.patch kvm-clarify-licensing.patch kvm-x86-emulator-fix-emulator-mov-cr-decoding.patch kvm-plumbing.patch kvm-dynamically-determine-which-msrs-to-load-and-save.patch kvm-fix-calculation-of-initial-value-of-rdx-register.patch kvm-avoid-using-vmx-instruction-directly.patch kvm-avoid-using-vmx-instruction-directly-fix-asm-constraints.patch kvm-expose-interrupt-bitmap.patch kvm-add-time-stamp-counter-msr-and-accessors.patch kvm-expose-msrs-to-userspace.patch kvm-expose-msrs-to-userspace-v2.patch kvm-create-kvm-intelko-module.patch kvm-make-dev-registration-happen-when-the-arch.patch kvm-make-hardware-detection-an-arch-operation.patch kvm-make-the-per-cpu-enable-disable-functions-arch.patch kvm-make-the-hardware-setup-operations-non-percpu.patch kvm-make-the-guest-debugger-an-arch-operation.patch kvm-make-msr-accessors-arch-operations.patch kvm-make-the-segment-accessors-arch-operations.patch kvm-cache-guest-cr4-in-vcpu-structure.patch kvm-cache-guest-cr0-in-vcpu-structure.patch kvm-add-get_segment_base-arch-accessor.patch kvm-add-idt-and-gdt-descriptor-accessors.patch kvm-make-syncing-the-register-file-to-the-vcpu.patch kvm-make-the-vcpu-execution-loop-an-arch-operation.patch kvm-move-the-vmx-exit-handlers-to-vmxc.patch kvm-make-vcpu_setup-an-arch-operation.patch kvm-make-__set_cr0-and-dependencies-arch-operations.patch kvm-make-__set_cr4-an-arch-operation.patch kvm-make-__set_efer-an-arch-operation.patch kvm-make-set_cr3-and-tlb-flushing-arch-operations.patch kvm-make-inject_page_fault-an-arch-operation.patch kvm-make-inject_gp-an-arch-operation.patch kvm-use-the-idt-and-gdt-accessors-in-realmode-emulation.patch kvm-use-the-general-purpose-register-accessors-rather.patch kvm-move-the-vmx-tsc-accessors-to-vmxc.patch kvm-access-rflags-through-an-arch-operation.patch kvm-move-the-vmx-segment-field-definitions-to-vmxc.patch kvm-add-an-arch-accessor-for-cs-d-b-and-l-bits.patch kvm-add-a-set_cr0_no_modeswitch-arch-accessor.patch kvm-make-vcpu_load-and-vcpu_put-arch-operations.patch kvm-make-vcpu-creation-and-destruction-arch-operations.patch kvm-move-vmcs-static-variables-to-vmxc.patch kvm-make-is_long_mode-an-arch-operation.patch kvm-use-the-tlb-flush-arch-operation-instead-of-an.patch kvm-remove-guest_cpl.patch kvm-move-vmcs-accessors-to-vmxc.patch kvm-move-vmx-helper-inlines-to-vmxc.patch kvm-remove-vmx-includes-from-arch-independent-code.patch - To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html