Gleb Natapov wrote: > On Mon, Feb 22, 2010 at 06:51:23PM +0100, Jan Kiszka wrote: >> Support both guest- as well as host-owned EFLAGS.TF while emulating >> instructions. For guest-owned TF, we simply inject DB and update DR6.BS >> after completing an instruction that has TF set on entry. To support >> guest single-stepping under host control, we store the pending step >> along with its CS and RIP and trigger a corresponding user space exit >> once guest execution is about to resume. This check is is also required >> in the VMX emulation loop during invalid guest states. >> > Emulator currently is a total mess. It is not a good time to add more mess > there right now IMO. Then let's clean up what you consider "mess" in this feature. Unless there are plans to clean up the emulator for the next or next-but-one kernel release, I do not want to wait for this. > >> Signed-off-by: Jan Kiszka <jan.kiszka@xxxxxxxxxxx> >> --- >> arch/x86/include/asm/kvm_host.h | 5 +++ >> arch/x86/kvm/vmx.c | 6 +++ >> arch/x86/kvm/x86.c | 65 ++++++++++++++++++++++++++++++++++----- >> 3 files changed, 68 insertions(+), 8 deletions(-) >> >> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h >> index d46e791..d69d8aa 100644 >> --- a/arch/x86/include/asm/kvm_host.h >> +++ b/arch/x86/include/asm/kvm_host.h >> @@ -362,8 +362,11 @@ struct kvm_vcpu_arch { >> u64 *mce_banks; >> >> /* used for guest single stepping over the given code position */ >> + bool singlestep_pending; >> u16 singlestep_cs; >> + u16 singlestep_pending_cs; >> unsigned long singlestep_rip; >> + unsigned long singlestep_pending_rip; > If we are going to have many of those rip/cs pairs may be it is better > to add structure linear_ip and have functions is_same_ip(). Agreed. > >> /* fields used by HYPER-V emulation */ >> u64 hv_vapic; >> }; >> @@ -820,4 +823,6 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v); >> void kvm_define_shared_msr(unsigned index, u32 msr); >> void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); >> >> +int kvm_check_guest_singlestep(struct kvm_vcpu *vcpu); >> + >> #endif /* _ASM_X86_KVM_HOST_H */ >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >> index d772476..317828f 100644 >> --- a/arch/x86/kvm/vmx.c >> +++ b/arch/x86/kvm/vmx.c >> @@ -3489,6 +3489,12 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) >> goto out; >> if (need_resched()) >> schedule(); >> + >> + if (unlikely(vcpu->arch.singlestep_pending)) { >> + ret = kvm_check_guest_singlestep(vcpu); >> + if (ret == 0) >> + goto out; >> + } >> } >> >> vmx->emulation_required = 0; >> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >> index 19e8b28..6ebebb9 100644 >> --- a/arch/x86/kvm/x86.c >> +++ b/arch/x86/kvm/x86.c >> @@ -3441,6 +3441,27 @@ static void cache_all_regs(struct kvm_vcpu *vcpu) >> vcpu->arch.regs_dirty = ~0; >> } >> >> +static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) >> +{ >> + struct kvm_segment kvm_seg; >> + >> + kvm_get_segment(vcpu, &kvm_seg, seg); >> + return kvm_seg.selector; >> +} >> + >> +static void queue_singlestep(struct kvm_vcpu *vcpu) >> +{ >> + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { >> + vcpu->arch.singlestep_pending = true; >> + vcpu->arch.singlestep_pending_cs = >> + get_segment_selector(vcpu, VCPU_SREG_CS); >> + vcpu->arch.singlestep_pending_rip = kvm_rip_read(vcpu); > Why should we remember rip where TF happened? We should exit > immediately to userspace anyway, no? I think MMIO exits takes precedence, so this is intended to exit after they completed, ie. after the instruction is fully finished. > >> + } else { >> + vcpu->arch.dr6 |= DR6_BS; >> + kvm_queue_exception(vcpu, DB_VECTOR); > What if instruction emulation generated fault? Fault-like exceptions will trigger before that, and the instruction won't complete. Do we have any trap-like exceptions to worry about? > >> + } >> +} >> + >> int emulate_instruction(struct kvm_vcpu *vcpu, >> unsigned long cr2, >> u16 error_code, >> @@ -3449,6 +3470,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, >> int r, shadow_mask; >> struct decode_cache *c; >> struct kvm_run *run = vcpu->run; >> + bool singlestep; >> >> kvm_clear_exception_queue(vcpu); >> vcpu->arch.mmio_fault_cr2 = cr2; >> @@ -3515,8 +3537,12 @@ int emulate_instruction(struct kvm_vcpu *vcpu, >> } >> } >> >> + singlestep = vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_TF; >> + >> if (emulation_type & EMULTYPE_SKIP) { >> kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.decode.eip); >> + if (singlestep) >> + queue_singlestep(vcpu); > Instruction that wasn't emulated shouldn't generate faults. > Skipping here doesn't mean it's not emulated. A valid question might be if we should catch it here or in skip_emulated_instruction. >> return EMULATE_DONE; >> } >> >> @@ -3549,6 +3575,9 @@ int emulate_instruction(struct kvm_vcpu *vcpu, >> >> kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); >> >> + if (singlestep) >> + queue_singlestep(vcpu); >> + > if vcpu->mmio_is_write == true we can still exit with DO_MMIO, so > instruction is not yes completely executed. This queue_singlestep > mechanism looks bogus anyway. emulate_instruction() caller should > initiate exit to userspace space if required. That's while it is _queued_, not immediately delivered: MMIO exits will continue to take precedence. > >> if (vcpu->mmio_is_write) { >> vcpu->mmio_needed = 0; >> return EMULATE_DO_MMIO; >> @@ -4450,6 +4479,26 @@ out: >> return r; >> } >> >> +int kvm_check_guest_singlestep(struct kvm_vcpu *vcpu) >> +{ >> + unsigned long rip = kvm_rip_read(vcpu); >> + >> + vcpu->arch.singlestep_pending = false; >> + >> + if (vcpu->arch.singlestep_pending_cs != >> + get_segment_selector(vcpu, VCPU_SREG_CS) || >> + vcpu->arch.singlestep_pending_rip != rip) >> + return 1; >> + > Again how this check can be false? E.g. someone fiddled with the VCPU state, resetting the guest. > >> + vcpu->run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1; >> + vcpu->run->debug.arch.dr7 = 0; >> + vcpu->run->exit_reason = KVM_EXIT_DEBUG; >> + vcpu->run->debug.arch.pc = get_segment_base(vcpu, VCPU_SREG_CS) + rip; >> + vcpu->run->debug.arch.exception = DB_VECTOR; >> + >> + return 0; >> +} >> +EXPORT_SYMBOL_GPL(kvm_check_guest_singlestep); >> >> static int __vcpu_run(struct kvm_vcpu *vcpu) >> { >> @@ -4471,6 +4520,12 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) >> >> r = 1; >> while (r > 0) { >> + if (unlikely(vcpu->arch.singlestep_pending)) { >> + r = kvm_check_guest_singlestep(vcpu); >> + if (r == 0) >> + break; >> + } >> + > Why not use existing mechanism to cause run loop to exit to userspace > i.e return 0 from vcpu_enter_guest(), instead of adding special cases here? > I wanted to exit in case of vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE as well, but thinking about this again it's actually more reasonable to exit once the VCPU unblocks again, e.g. once halt resumes. >> if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) >> r = vcpu_enter_guest(vcpu); >> else { >> @@ -4828,14 +4883,6 @@ static gpa_t get_tss_base_addr_read(struct kvm_vcpu *vcpu, >> return kvm_mmu_gva_to_gpa_read(vcpu, base_addr, NULL); >> } >> >> -static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) >> -{ >> - struct kvm_segment kvm_seg; >> - >> - kvm_get_segment(vcpu, &kvm_seg, seg); >> - return kvm_seg.selector; >> -} >> - >> static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg) >> { >> struct kvm_segment segvar = { >> @@ -5607,6 +5654,8 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) >> vcpu->arch.dr6 = DR6_FIXED_1; >> vcpu->arch.dr7 = DR7_FIXED_1; >> >> + vcpu->arch.singlestep_pending = false; >> + >> return kvm_x86_ops->vcpu_reset(vcpu); >> } >> >> -- >> 1.6.0.2 > > -- > Gleb. Thanks, Jan -- Siemens AG, Corporate Technology, CT T DE IT 1 Corporate Competence Center Embedded Linux -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html