Currently when string instruction is only partially complete we go back to a guest mode, guest tries to reexecute instruction and exits again and at this point emulation continues. Avoid all of this by restarting instruction without going back to a guest mode, but return to a guest mode each 1024 iterations to allow interrupt injection. Pending exception causes immediate guest entry too. Signed-off-by: Gleb Natapov <gleb@xxxxxxxxxx> --- arch/x86/include/asm/kvm_emulate.h | 1 + arch/x86/kvm/emulate.c | 34 +++++++++++++++++++++++----------- arch/x86/kvm/x86.c | 19 ++++++++++++++++++- 3 files changed, 42 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 679245c..7fda16f 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -193,6 +193,7 @@ struct x86_emulate_ctxt { /* interruptibility state, as a result of execution of STI or MOV SS */ int interruptibility; + bool restart; /* restart string instruction after writeback */ /* decode cache */ struct decode_cache decode; }; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 541f3c9..c4da60e 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -927,8 +927,11 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) int mode = ctxt->mode; int def_op_bytes, def_ad_bytes, group; - /* Shadow copy of register state. Committed on successful emulation. */ + /* we cannot decode insn before we complete previous rep insn */ + WARN_ON(ctxt->restart); + + /* Shadow copy of register state. Committed on successful emulation. */ memset(c, 0, sizeof(struct decode_cache)); c->eip = ctxt->eip; ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); @@ -2422,6 +2425,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) u64 msr_data; struct decode_cache *c = &ctxt->decode; int rc = X86EMUL_CONTINUE; + int saved_dst_type = c->dst.type; ctxt->interruptibility = 0; @@ -2450,8 +2454,11 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) } if (c->rep_prefix && (c->d & String)) { + ctxt->restart = true; /* All REP prefixes have the same first termination condition */ if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) { + string_done: + ctxt->restart = false; kvm_rip_write(ctxt->vcpu, c->eip); goto done; } @@ -2463,17 +2470,13 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) * - if REPNE/REPNZ and ZF = 1 then done */ if ((c->b == 0xa6) || (c->b == 0xa7) || - (c->b == 0xae) || (c->b == 0xaf)) { + (c->b == 0xae) || (c->b == 0xaf)) { if ((c->rep_prefix == REPE_PREFIX) && - ((ctxt->eflags & EFLG_ZF) == 0)) { - kvm_rip_write(ctxt->vcpu, c->eip); - goto done; - } + ((ctxt->eflags & EFLG_ZF) == 0)) + goto string_done; if ((c->rep_prefix == REPNE_PREFIX) && - ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) { - kvm_rip_write(ctxt->vcpu, c->eip); - goto done; - } + ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) + goto string_done; } c->eip = ctxt->eip; } @@ -2906,6 +2909,12 @@ writeback: if (rc != X86EMUL_CONTINUE) goto done; + /* + * restore dst type in case the decoding will be reused + * (happens for string instruction ) + */ + c->dst.type = saved_dst_type; + if ((c->d & SrcMask) == SrcSI) string_addr_inc(ctxt, seg_override_base(ctxt, c), VCPU_REGS_RSI, &c->src); @@ -2913,8 +2922,11 @@ writeback: if ((c->d & DstMask) == DstDI) string_addr_inc(ctxt, es_base(ctxt), VCPU_REGS_RDI, &c->dst); - if (c->rep_prefix && (c->d & String)) + if (c->rep_prefix && (c->d & String)) { register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1); + if (!(c->regs[VCPU_REGS_RCX] & 0x3ff)) + ctxt->restart = false; + } /* Commit shadow register state. */ memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b8237ac..cd0043a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3718,6 +3718,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, return EMULATE_DONE; } +restart: r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); shadow_mask = vcpu->arch.emulate_ctxt.interruptibility; @@ -3740,7 +3741,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, if (r) { if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) - return EMULATE_DONE; + goto done; if (!vcpu->mmio_needed) { kvm_report_emulation_failure(vcpu, "mmio"); return EMULATE_FAIL; @@ -3755,6 +3756,13 @@ int emulate_instruction(struct kvm_vcpu *vcpu, return EMULATE_DO_MMIO; } +done: + if (vcpu->arch.exception.pending) + vcpu->arch.emulate_ctxt.restart = false; + + if (vcpu->arch.emulate_ctxt.restart) + goto restart; + return EMULATE_DONE; } EXPORT_SYMBOL_GPL(emulate_instruction); @@ -4523,6 +4531,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) goto out; } } + if (vcpu->arch.emulate_ctxt.restart) { + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); + if (r == EMULATE_DO_MMIO) { + r = 0; + goto out; + } + } if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) kvm_register_write(vcpu, VCPU_REGS_RAX, kvm_run->hypercall.ret); -- 1.6.5 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html