Re: [PATCH 6/6] KVM: x86: Emulator support for TF

Gleb Natapov <gleb@xxxxxxxxxx> · Tue, 23 Feb 2010 12:26:20 +0200



On Tue, Feb 23, 2010 at 11:10:57AM +0100, Jan Kiszka wrote:
> Gleb Natapov wrote:
> > On Mon, Feb 22, 2010 at 06:51:23PM +0100, Jan Kiszka wrote:
> >> Support both guest- as well as host-owned EFLAGS.TF while emulating
> >> instructions. For guest-owned TF, we simply inject DB and update DR6.BS
> >> after completing an instruction that has TF set on entry. To support
> >> guest single-stepping under host control, we store the pending step
> >> along with its CS and RIP and trigger a corresponding user space exit
> >> once guest execution is about to resume. This check is is also required
> >> in the VMX emulation loop during invalid guest states.
> >>
> > Emulator currently is a total mess. It is not a good time to add more mess
> > there right now IMO.
> 
> Then let's clean up what you consider "mess" in this feature. Unless
> there are plans to clean up the emulator for the next or next-but-one
> kernel release, I do not want to wait for this.
> 
There are plans to cleanup the emulator.

> > 
> >> Signed-off-by: Jan Kiszka <jan.kiszka@xxxxxxxxxxx>
> >> ---
> >>  arch/x86/include/asm/kvm_host.h |    5 +++
> >>  arch/x86/kvm/vmx.c              |    6 +++
> >>  arch/x86/kvm/x86.c              |   65 ++++++++++++++++++++++++++++++++++-----
> >>  3 files changed, 68 insertions(+), 8 deletions(-)
> >>
> >> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> >> index d46e791..d69d8aa 100644
> >> --- a/arch/x86/include/asm/kvm_host.h
> >> +++ b/arch/x86/include/asm/kvm_host.h
> >> @@ -362,8 +362,11 @@ struct kvm_vcpu_arch {
> >>  	u64 *mce_banks;
> >>  
> >>  	/* used for guest single stepping over the given code position */
> >> +	bool singlestep_pending;
> >>  	u16 singlestep_cs;
> >> +	u16 singlestep_pending_cs;
> >>  	unsigned long singlestep_rip;
> >> +	unsigned long singlestep_pending_rip;
> > If we are going to have many of those rip/cs pairs may be it is better
> > to add structure linear_ip and have functions is_same_ip().
> 
> Agreed.
> 
> >  
> >>  	/* fields used by HYPER-V emulation */
> >>  	u64 hv_vapic;
> >>  };
> >> @@ -820,4 +823,6 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
> >>  void kvm_define_shared_msr(unsigned index, u32 msr);
> >>  void kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
> >>  
> >> +int kvm_check_guest_singlestep(struct kvm_vcpu *vcpu);
> >> +
> >>  #endif /* _ASM_X86_KVM_HOST_H */
> >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> >> index d772476..317828f 100644
> >> --- a/arch/x86/kvm/vmx.c
> >> +++ b/arch/x86/kvm/vmx.c
> >> @@ -3489,6 +3489,12 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
> >>  			goto out;
> >>  		if (need_resched())
> >>  			schedule();
> >> +
> >> +		if (unlikely(vcpu->arch.singlestep_pending)) {
> >> +			ret = kvm_check_guest_singlestep(vcpu);
> >> +			if (ret == 0)
> >> +				goto out;
> >> +		}
> >>  	}
> >>  
> >>  	vmx->emulation_required = 0;
> >> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> >> index 19e8b28..6ebebb9 100644
> >> --- a/arch/x86/kvm/x86.c
> >> +++ b/arch/x86/kvm/x86.c
> >> @@ -3441,6 +3441,27 @@ static void cache_all_regs(struct kvm_vcpu *vcpu)
> >>  	vcpu->arch.regs_dirty = ~0;
> >>  }
> >>  
> >> +static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg)
> >> +{
> >> +	struct kvm_segment kvm_seg;
> >> +
> >> +	kvm_get_segment(vcpu, &kvm_seg, seg);
> >> +	return kvm_seg.selector;
> >> +}
> >> +
> >> +static void queue_singlestep(struct kvm_vcpu *vcpu)
> >> +{
> >> +	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
> >> +		vcpu->arch.singlestep_pending = true;
> >> +		vcpu->arch.singlestep_pending_cs =
> >> +			get_segment_selector(vcpu, VCPU_SREG_CS);
> >> +		vcpu->arch.singlestep_pending_rip = kvm_rip_read(vcpu);
> > Why should we remember rip where TF happened? We should exit
> > immediately to userspace anyway, no?
> 
> I think MMIO exits takes precedence, so this is intended to exit after
> they completed, ie. after the instruction is fully finished.
> 
> > 
> >> +	} else {
> >> +		vcpu->arch.dr6 |= DR6_BS;
> >> +		kvm_queue_exception(vcpu, DB_VECTOR);
> > What if instruction emulation generated fault?
> 
> Fault-like exceptions will trigger before that, and the instruction
> won't complete. Do we have any trap-like exceptions to worry about?
> 
They will not trigger before that. They will be queued for the next
entry and queuing another one will either overwrite the previous one,
or will queue double fault (depending on what what the first exception).

> > 
> >> +	}
> >> +}
> >> +
> >>  int emulate_instruction(struct kvm_vcpu *vcpu,
> >>  			unsigned long cr2,
> >>  			u16 error_code,
> >> @@ -3449,6 +3470,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
> >>  	int r, shadow_mask;
> >>  	struct decode_cache *c;
> >>  	struct kvm_run *run = vcpu->run;
> >> +	bool singlestep;
> >>  
> >>  	kvm_clear_exception_queue(vcpu);
> >>  	vcpu->arch.mmio_fault_cr2 = cr2;
> >> @@ -3515,8 +3537,12 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
> >>  		}
> >>  	}
> >>  
> >> +	singlestep = vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_TF;
> >> +
> >>  	if (emulation_type & EMULTYPE_SKIP) {
> >>  		kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.decode.eip);
> >> +		if (singlestep)
> >> +			queue_singlestep(vcpu);
> > Instruction that wasn't emulated shouldn't generate faults.
> > 
> 
> Skipping here doesn't mean it's not emulated. A valid question might be
> if we should catch it here or in skip_emulated_instruction.
In skip_emulated_instruction() or even above that. Only at the point we
are sure we actually emulate instruction.

> 
> >>  		return EMULATE_DONE;
> >>  	}
> >>  
> >> @@ -3549,6 +3575,9 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
> >>  
> >>  	kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
> >>  
> >> +	if (singlestep)
> >> +		queue_singlestep(vcpu);
> >> +
> > if vcpu->mmio_is_write == true we can still exit with DO_MMIO, so
> > instruction is not yes completely executed. This queue_singlestep
> > mechanism looks bogus anyway. emulate_instruction() caller should
> > initiate exit to userspace space if required.
> 
> That's while it is _queued_, not immediately delivered: MMIO exits will
> continue to take precedence.
That is bogus. We should queue TF only after instruction is completely
emulated, not during emulation. Instruction may generate dozen MMIO
exits and eventually be aborted by exception, so DB shouldn't be generated
at all in this case.

> 
> > 
> >>  	if (vcpu->mmio_is_write) {
> >>  		vcpu->mmio_needed = 0;
> >>  		return EMULATE_DO_MMIO;
> >> @@ -4450,6 +4479,26 @@ out:
> >>  	return r;
> >>  }
> >>  
> >> +int kvm_check_guest_singlestep(struct kvm_vcpu *vcpu)
> >> +{
> >> +	unsigned long rip = kvm_rip_read(vcpu);
> >> +
> >> +	vcpu->arch.singlestep_pending = false;
> >> +
> >> +	if (vcpu->arch.singlestep_pending_cs !=
> >> +		get_segment_selector(vcpu, VCPU_SREG_CS) ||
> >> +	    vcpu->arch.singlestep_pending_rip != rip)
> >> +		return 1;
> >> +
> > Again how this check can be false?
> 
> E.g. someone fiddled with the VCPU state, resetting the guest.
How is this someone? It should reset this state too then.

> 
> > 
> >> +	vcpu->run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1;
> >> +	vcpu->run->debug.arch.dr7 = 0;
> >> +	vcpu->run->exit_reason = KVM_EXIT_DEBUG;
> >> +	vcpu->run->debug.arch.pc = get_segment_base(vcpu, VCPU_SREG_CS) + rip;
> >> +	vcpu->run->debug.arch.exception = DB_VECTOR;
> >> +
> >> +	return 0;
> >> +}
> >> +EXPORT_SYMBOL_GPL(kvm_check_guest_singlestep);
> >>  
> >>  static int __vcpu_run(struct kvm_vcpu *vcpu)
> >>  {
> >> @@ -4471,6 +4520,12 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
> >>  
> >>  	r = 1;
> >>  	while (r > 0) {
> >> +		if (unlikely(vcpu->arch.singlestep_pending)) {
> >> +			r = kvm_check_guest_singlestep(vcpu);
> >> +			if (r == 0)
> >> +				break;
> >> +		}
> >> +
> > Why not use existing mechanism to cause run loop to exit to userspace
> > i.e return 0 from vcpu_enter_guest(), instead of adding special cases here?
> > 
> 
> I wanted to exit in case of vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE
> as well, but thinking about this again it's actually more reasonable to
> exit once the VCPU unblocks again, e.g. once halt resumes.
> 
We will exit immediately after halt if return value is 0, why would you
want to exit on vcpu entry after that?
 
> >>  		if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
> >>  			r = vcpu_enter_guest(vcpu);
> >>  		else {
> >> @@ -4828,14 +4883,6 @@ static gpa_t get_tss_base_addr_read(struct kvm_vcpu *vcpu,
> >>  	return kvm_mmu_gva_to_gpa_read(vcpu, base_addr, NULL);
> >>  }
> >>  
> >> -static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg)
> >> -{
> >> -	struct kvm_segment kvm_seg;
> >> -
> >> -	kvm_get_segment(vcpu, &kvm_seg, seg);
> >> -	return kvm_seg.selector;
> >> -}
> >> -
> >>  static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg)
> >>  {
> >>  	struct kvm_segment segvar = {
> >> @@ -5607,6 +5654,8 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
> >>  	vcpu->arch.dr6 = DR6_FIXED_1;
> >>  	vcpu->arch.dr7 = DR7_FIXED_1;
> >>  
> >> +	vcpu->arch.singlestep_pending = false;
> >> +
> >>  	return kvm_x86_ops->vcpu_reset(vcpu);
> >>  }
> >>  
> >> -- 
> >> 1.6.0.2
> > 
> > --
> > 			Gleb.
> 
> Thanks,
> Jan
> 
> -- 
> Siemens AG, Corporate Technology, CT T DE IT 1
> Corporate Competence Center Embedded Linux

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html