"rep ins" emulation is going through emulator now. This is slow because emulator knows how to write back only one datum at a time. This patch provides fast path for the instruction in certain conditions. The conditions are: DF flag is not set, destination memory is RAM and single datum does not cross page boundary. If fast path code fails it falls back to emulation. Signed-off-by: Gleb Natapov <gleb@xxxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 6 ++ arch/x86/kvm/svm.c | 20 +++- arch/x86/kvm/vmx.c | 25 +++-- arch/x86/kvm/x86.c | 213 +++++++++++++++++++++++++++++++++------ 4 files changed, 220 insertions(+), 44 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 6212575..ecf8430 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -411,6 +411,10 @@ struct kvm_vcpu_arch { /* emulate context */ struct x86_emulate_ctxt emulate_ctxt; + struct x86_fast_string_pio_ctxt { + unsigned long linear_addr; + u8 ad_bytes; + } fast_string_pio_ctxt; bool emulate_regs_need_sync_to_vcpu; bool emulate_regs_need_sync_from_vcpu; int (*complete_userspace_io)(struct kvm_vcpu *vcpu); @@ -776,6 +780,8 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); struct x86_emulate_ctxt; int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); +int kvm_fast_string_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port, + u8 ad_bytes_idx); void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int kvm_emulate_halt(struct kvm_vcpu *vcpu); int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 7a41878..f3e7bb3 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1887,21 +1887,31 @@ static int io_interception(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu = &svm->vcpu; u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ - int size, in, string; + int size, in, string, rep; unsigned port; ++svm->vcpu.stat.io_exits; string = (io_info & SVM_IOIO_STR_MASK) != 0; + rep = (io_info & SVM_IOIO_REP_MASK) != 0; in = (io_info & SVM_IOIO_TYPE_MASK) != 0; - if (string || in) - return emulate_instruction(vcpu, 0) == EMULATE_DONE; port = io_info >> 16; size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; svm->next_rip = svm->vmcb->control.exit_info_2; - skip_emulated_instruction(&svm->vcpu); - return kvm_fast_pio_out(vcpu, size, port); + if (!string && !in) { + skip_emulated_instruction(&svm->vcpu); + return kvm_fast_pio_out(vcpu, size, port); + } else if (string && in && rep) { + int addr_size = (io_info & SVM_IOIO_ASIZE_MASK) >> + SVM_IOIO_ASIZE_SHIFT; + int r = kvm_fast_string_pio_in(vcpu, size, port, + ffs(addr_size) - 1); + if (r != EMULATE_FAIL) + return r == EMULATE_DONE; + } + + return emulate_instruction(vcpu, 0) == EMULATE_DONE; } static int nmi_interception(struct vcpu_svm *svm) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e10ec0e..bae2c11 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -639,6 +639,7 @@ static unsigned long *vmx_msr_bitmap_longmode; static bool cpu_has_load_ia32_efer; static bool cpu_has_load_perf_global_ctrl; +static bool cpu_has_ins_outs_inst_info; static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS); static DEFINE_SPINLOCK(vmx_vpid_lock); @@ -2522,6 +2523,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) if (((vmx_msr_high >> 18) & 15) != 6) return -EIO; + cpu_has_ins_outs_inst_info = vmx_msr_high & (1u << 22); + vmcs_conf->size = vmx_msr_high & 0x1fff; vmcs_conf->order = get_order(vmcs_config.size); vmcs_conf->revision_id = vmx_msr_low; @@ -4393,23 +4396,31 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu) static int handle_io(struct kvm_vcpu *vcpu) { unsigned long exit_qualification; - int size, in, string; + int size, in, string, rep; unsigned port; exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - string = (exit_qualification & 16) != 0; in = (exit_qualification & 8) != 0; + string = (exit_qualification & 16) != 0; + rep = (exit_qualification & 32) != 0; ++vcpu->stat.io_exits; - if (string || in) - return emulate_instruction(vcpu, 0) == EMULATE_DONE; - port = exit_qualification >> 16; size = (exit_qualification & 7) + 1; - skip_emulated_instruction(vcpu); - return kvm_fast_pio_out(vcpu, size, port); + if (!string && !in) { + skip_emulated_instruction(vcpu); + return kvm_fast_pio_out(vcpu, size, port); + } else if (string && in && rep && cpu_has_ins_outs_inst_info) { + u32 inst_info = vmcs_read32(VMX_INSTRUCTION_INFO); + int r = kvm_fast_string_pio_in(vcpu, size, port, + (inst_info >> 7) & 7); + if (r != EMULATE_FAIL) + return r == EMULATE_DONE; + } + + return emulate_instruction(vcpu, 0) == EMULATE_DONE; } static void diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a4bc431..6acea31 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3661,6 +3661,59 @@ out: } EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system); +static bool get_segment_descriptor(struct kvm_vcpu *vcpu, u16 *selector, + struct desc_struct *desc, u32 *base3, + int seg) +{ + struct kvm_segment var; + + kvm_get_segment(vcpu, &var, seg); + *selector = var.selector; + + if (var.unusable) + return false; + + if (var.g) + var.limit >>= 12; + set_desc_limit(desc, var.limit); + set_desc_base(desc, (unsigned long)var.base); +#ifdef CONFIG_X86_64 + if (base3) + *base3 = var.base >> 32; +#endif + desc->type = var.type; + desc->s = var.s; + desc->dpl = var.dpl; + desc->p = var.present; + desc->avl = var.avl; + desc->l = var.l; + desc->d = var.db; + desc->g = var.g; + + return true; +} + +static int kvm_linearize_address(struct kvm_vcpu *vcpu, enum x86emul_mode mode, + ulong ea, unsigned seg, unsigned size, bool write, bool fetch, + u8 ad_bytes, ulong *linear) +{ + struct x86_linearize_params param = { + .mode = mode, + .ea = ea, + .size = size, + .seg = seg, + .write = write, + .fetch = fetch, + .ad_bytes = ad_bytes, + .cpl = kvm_x86_ops->get_cpl(vcpu) + }; + + param.usable = get_segment_descriptor(vcpu, ¶m.sel, ¶m.desc, + NULL, seg); + + return x86_linearize(¶m, linear); +} + static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva, gpa_t *gpa, struct x86_exception *exception, bool write) @@ -4197,32 +4250,9 @@ static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector, struct desc_struct *desc, u32 *base3, int seg) { - struct kvm_segment var; - - kvm_get_segment(emul_to_vcpu(ctxt), &var, seg); - *selector = var.selector; - if (var.unusable) - return false; - - if (var.g) - var.limit >>= 12; - set_desc_limit(desc, var.limit); - set_desc_base(desc, (unsigned long)var.base); -#ifdef CONFIG_X86_64 - if (base3) - *base3 = var.base >> 32; -#endif - desc->type = var.type; - desc->s = var.s; - desc->dpl = var.dpl; - desc->p = var.present; - desc->avl = var.avl; - desc->l = var.l; - desc->d = var.db; - desc->g = var.g; - - return true; + return get_segment_descriptor(emul_to_vcpu(ctxt), selector, desc, base3, + seg); } static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector, @@ -4408,10 +4438,22 @@ static void init_decode_cache(struct x86_emulate_ctxt *ctxt, ctxt->mem_read.end = 0; } +static enum x86emul_mode get_emulation_mode(struct kvm_vcpu *vcpu) +{ + int cs_db, cs_l; + + kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); + + return (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : + (kvm_get_rflags(vcpu) & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 : + cs_l ? X86EMUL_MODE_PROT64 : + cs_db ? X86EMUL_MODE_PROT32 : + X86EMUL_MODE_PROT16; +} + static void init_emulate_ctxt(struct kvm_vcpu *vcpu) { struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; - int cs_db, cs_l; /* * TODO: fix emulate.c to use guest_read/write_register @@ -4421,15 +4463,10 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) */ cache_all_regs(vcpu); - kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); ctxt->eflags = kvm_get_rflags(vcpu); ctxt->eip = kvm_rip_read(vcpu); - ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : - (ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 : - cs_l ? X86EMUL_MODE_PROT64 : - cs_db ? X86EMUL_MODE_PROT32 : - X86EMUL_MODE_PROT16; + ctxt->mode = get_emulation_mode(vcpu); ctxt->guest_mode = is_guest_mode(vcpu); init_decode_cache(ctxt, vcpu->arch.regs); @@ -4665,6 +4702,118 @@ int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) } EXPORT_SYMBOL_GPL(kvm_fast_pio_out); +static int __kvm_fast_string_pio_in(struct kvm_vcpu *vcpu, int size, + unsigned short port, unsigned long addr, + int count) +{ + struct page *page; + gpa_t gpa; + char *kaddr; + int ret; + + gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL); + + if (gpa == UNMAPPED_GVA || + (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) + return EMULATE_FAIL; + + page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); + if (is_error_page(page)) { + kvm_release_page_clean(page); + return EMULATE_FAIL; + } + + kaddr = kmap_atomic(page); + kaddr += offset_in_page(gpa); + + ret = emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, size, port, + kaddr, count); + + kunmap_atomic(kaddr); + if (ret) { + u8 ad_bytes = vcpu->arch.fast_string_pio_ctxt.ad_bytes; + unsigned long reg; + + reg = kvm_register_read(vcpu, VCPU_REGS_RCX); + kvm_register_address_increment(ad_bytes, ®, -count); + kvm_register_write(vcpu, VCPU_REGS_RCX, reg); + + reg = kvm_register_read(vcpu, VCPU_REGS_RDI); + kvm_register_address_increment(ad_bytes, ®, count * size); + kvm_register_write(vcpu, VCPU_REGS_RDI, reg); + + kvm_release_page_dirty(page); + return EMULATE_DONE; + } + kvm_release_page_clean(page); + return EMULATE_DO_MMIO; +} + +static int complete_fast_string_pio(struct kvm_vcpu *vcpu) +{ + unsigned long linear_addr = vcpu->arch.fast_string_pio_ctxt.linear_addr; + int r; + + BUG_ON(!vcpu->arch.pio.count); + + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + r = __kvm_fast_string_pio_in(vcpu, vcpu->arch.pio.size, + vcpu->arch.pio.port, linear_addr, vcpu->arch.pio.count); + BUG_ON(r == EMULATE_DO_MMIO); + if (r == EMULATE_FAIL) /* mem slot gone while we were not looking */ + vcpu->arch.pio.count = 0; /* drop the pio data */ + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); + return 1; +} + +int kvm_fast_string_pio_in(struct kvm_vcpu *vcpu, int size, + unsigned short port, u8 ad_bytes_idx) +{ + unsigned long rdi = kvm_register_read(vcpu, VCPU_REGS_RDI); + unsigned long linear_addr = rdi + get_segment_base(vcpu, VCPU_SREG_ES); + unsigned long rcx = kvm_register_read(vcpu, VCPU_REGS_RCX), count; + u8 ad_bytes; + int r; + + if (rcx == 0) { + kvm_x86_ops->skip_emulated_instruction(vcpu); + return EMULATE_DONE; + } + if (kvm_get_rflags(vcpu) & X86_EFLAGS_DF) + return EMULATE_FAIL; + if (ad_bytes_idx > 2) + return EMULATE_FAIL; + + ad_bytes = (u8[]){2, 4, 8}[ad_bytes_idx]; + + rdi = kvm_address_mask(ad_bytes, rdi); + + count = (PAGE_SIZE - offset_in_page(rdi))/size; + + if (count == 0) /* 'in' crosses page boundry */ + return EMULATE_FAIL; + + count = min(count, kvm_address_mask(ad_bytes, rcx)); + + r = kvm_linearize_address(vcpu, get_emulation_mode(vcpu), + rdi, VCPU_SREG_ES, count*size, true, false, ad_bytes, + &linear_addr); + + if (r >= 0) + return EMULATE_FAIL; + + r = __kvm_fast_string_pio_in(vcpu, size, port, linear_addr, count); + + if (r != EMULATE_DO_MMIO) + return r; + + vcpu->arch.fast_string_pio_ctxt.linear_addr = linear_addr; + vcpu->arch.fast_string_pio_ctxt.ad_bytes = ad_bytes; + vcpu->arch.complete_userspace_io = complete_fast_string_pio; + return EMULATE_DO_MMIO; +} +EXPORT_SYMBOL_GPL(kvm_fast_string_pio_in); + static void tsc_bad(void *info) { __this_cpu_write(cpu_tsc_khz, 0); -- 1.7.10 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html