On Tue, Jun 29, 2010 at 11:16:59AM +0800, Sheng Yang wrote: > Some guest device driver may leverage the "Non-Snoop" I/O, and explicitly > WBINVD or CLFLUSH to a RAM space. Since migration may occur before WBINVD or > CLFLUSH, we need to maintain data consistency either by: > 1: flushing cache (wbinvd) when the guest is scheduled out if there is no > wbinvd exit, or > 2: execute wbinvd on all dirty physical CPUs when guest wbinvd exits. > > Signed-off-by: Yaozu (Eddie) Dong <eddie.dong@xxxxxxxxx> > Signed-off-by: Sheng Yang <sheng@xxxxxxxxxxxxxxx> > --- > arch/x86/include/asm/kvm_host.h | 6 +++++ > arch/x86/kvm/emulate.c | 5 +++- > arch/x86/kvm/svm.c | 7 ++++++ > arch/x86/kvm/vmx.c | 10 ++++++++- > arch/x86/kvm/x86.c | 41 +++++++++++++++++++++++++++++++++++++++ > 5 files changed, 67 insertions(+), 2 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index a57cdea..2bda624 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -15,6 +15,7 @@ > #include <linux/mm.h> > #include <linux/mmu_notifier.h> > #include <linux/tracepoint.h> > +#include <linux/cpumask.h> > > #include <linux/kvm.h> > #include <linux/kvm_para.h> > @@ -358,6 +359,8 @@ struct kvm_vcpu_arch { > > /* fields used by HYPER-V emulation */ > u64 hv_vapic; > + > + cpumask_var_t wbinvd_dirty_mask; > }; > > struct kvm_arch { > @@ -514,6 +517,8 @@ struct kvm_x86_ops { > > void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry); > > + bool (*has_wbinvd_exit)(void); > + > const struct trace_print_flags *exit_reasons_str; > }; > > @@ -571,6 +576,7 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); > int kvm_emulate_halt(struct kvm_vcpu *vcpu); > int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); > int emulate_clts(struct kvm_vcpu *vcpu); > +int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); > > void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); > int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); > diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c > index abb8cec..e8bdddc 100644 > --- a/arch/x86/kvm/emulate.c > +++ b/arch/x86/kvm/emulate.c > @@ -3138,8 +3138,11 @@ twobyte_insn: > emulate_clts(ctxt->vcpu); > c->dst.type = OP_NONE; > break; > - case 0x08: /* invd */ > case 0x09: /* wbinvd */ > + kvm_emulate_wbinvd(ctxt->vcpu); > + c->dst.type = OP_NONE; > + break; > + case 0x08: /* invd */ > case 0x0d: /* GrpP (prefetch) */ > case 0x18: /* Grp16 (prefetch/nop) */ > c->dst.type = OP_NONE; > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > index 587b99d..56c9b6b 100644 > --- a/arch/x86/kvm/svm.c > +++ b/arch/x86/kvm/svm.c > @@ -3424,6 +3424,11 @@ static bool svm_rdtscp_supported(void) > return false; > } > > +static bool svm_has_wbinvd_exit(void) > +{ > + return true; > +} > + > static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) > { > struct vcpu_svm *svm = to_svm(vcpu); > @@ -3508,6 +3513,8 @@ static struct kvm_x86_ops svm_x86_ops = { > .rdtscp_supported = svm_rdtscp_supported, > > .set_supported_cpuid = svm_set_supported_cpuid, > + > + .has_wbinvd_exit = svm_has_wbinvd_exit, > }; > > static int __init svm_init(void) > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index e565689..806ab12 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -412,6 +412,12 @@ static inline bool cpu_has_virtual_nmis(void) > return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; > } > > +static inline bool cpu_has_vmx_wbinvd_exit(void) > +{ > + return vmcs_config.cpu_based_2nd_exec_ctrl & > + SECONDARY_EXEC_WBINVD_EXITING; > +} > + > static inline bool report_flexpriority(void) > { > return flexpriority_enabled; > @@ -3400,7 +3406,7 @@ static int handle_invlpg(struct kvm_vcpu *vcpu) > static int handle_wbinvd(struct kvm_vcpu *vcpu) > { > skip_emulated_instruction(vcpu); > - /* TODO: Add support for VT-d/pass-through device */ > + kvm_emulate_wbinvd(vcpu); > return 1; > } > > @@ -4350,6 +4356,8 @@ static struct kvm_x86_ops vmx_x86_ops = { > .rdtscp_supported = vmx_rdtscp_supported, > > .set_supported_cpuid = vmx_set_supported_cpuid, > + > + .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, > }; > > static int __init vmx_init(void) > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index d0b9252..9a400ae 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -1783,8 +1783,28 @@ out: > return r; > } > > +static void wbinvd_ipi(void *garbage) > +{ > + wbinvd(); > +} > + > +static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu) > +{ > + return vcpu->kvm->arch.iommu_domain && > + !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY); > +} > + > void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) > { > + /* Address WBINVD may be executed by guest */ > + if (need_emulate_wbinvd(vcpu)) { > + if (kvm_x86_ops->has_wbinvd_exit()) > + cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask); > + else if (vcpu->cpu != -1) && vcpu->cpu != cpu > + smp_call_function_single(vcpu->cpu, > + wbinvd_ipi, NULL, 1); > + } > + > kvm_x86_ops->vcpu_load(vcpu, cpu); > if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) { > unsigned long khz = cpufreq_quick_get(cpu); > @@ -3650,6 +3670,21 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) > return X86EMUL_CONTINUE; > } > > +int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) > +{ > + if (!need_emulate_wbinvd(vcpu)) > + return X86EMUL_CONTINUE; > + > + if (kvm_x86_ops->has_wbinvd_exit()) { > + smp_call_function_many(vcpu->arch.wbinvd_dirty_mask, > + wbinvd_ipi, NULL, 1); work_on_cpu() loop instead of smp_call_function_many(), to avoid executing wbinvd with interrupts disabled. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html