Vineeth Pillai <viremana@xxxxxxxxxxxxxxxxxxx> writes: > Currently the remote TLB flush logic is specific to VMX. > Move it to a common place so that SVM can use it as well. > > Signed-off-by: Vineeth Pillai <viremana@xxxxxxxxxxxxxxxxxxx> > --- > arch/x86/include/asm/kvm_host.h | 14 +++++ > arch/x86/kvm/hyperv.c | 87 +++++++++++++++++++++++++++++ > arch/x86/kvm/hyperv.h | 20 +++++++ > arch/x86/kvm/vmx/vmx.c | 97 +++------------------------------ > arch/x86/kvm/vmx/vmx.h | 10 ---- > arch/x86/kvm/x86.c | 9 ++- > 6 files changed, 136 insertions(+), 101 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 877a4025d8da..ed84c15d18bc 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -838,6 +838,15 @@ struct kvm_vcpu_arch { > > /* Protected Guests */ > bool guest_state_protected; > + > +#if IS_ENABLED(CONFIG_HYPERV) > + /* > + * Two Dimensional paging CR3 > + * EPTP for Intel > + * nCR3 for AMD > + */ > + u64 tdp_pointer; > +#endif > }; > > struct kvm_lpage_info { > @@ -1079,6 +1088,11 @@ struct kvm_arch { > */ > spinlock_t tdp_mmu_pages_lock; > #endif /* CONFIG_X86_64 */ > + > +#if IS_ENABLED(CONFIG_HYPERV) > + int tdp_pointers_match; > + spinlock_t tdp_pointer_lock; > +#endif > }; > > struct kvm_vm_stat { > diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c > index 58fa8c029867..614b4448a028 100644 > --- a/arch/x86/kvm/hyperv.c > +++ b/arch/x86/kvm/hyperv.c I still think that using arch/x86/kvm/hyperv.[ch] for KVM-on-Hyper-V is misleading. Currently, these are dedicated to emulating Hyper-V interface to KVM guests and this is orthogonal to nesting KVM on Hyper-V. As a solution, I'd suggest you either: - Put the stuff in x86.c - Create a dedicated set of files, e.g. 'kvmonhyperv.[ch]' (I also thought about 'hyperv_host.[ch]' but then I realized it's equally misleading as one can read this as 'KVM is acting as Hyper-V host'). Personally, I'd vote for the later. Besides eliminating confusion, the benefit of having dedicated files is that we can avoid compiling them completely when !IS_ENABLED(CONFIG_HYPERV) (#ifdefs in C are ugly). > @@ -32,6 +32,7 @@ > #include <linux/eventfd.h> > > #include <asm/apicdef.h> > +#include <asm/mshyperv.h> > #include <trace/events/kvm.h> > > #include "trace.h" > @@ -2180,3 +2181,89 @@ int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, > > return 0; > } > + > +#if IS_ENABLED(CONFIG_HYPERV) > +/* check_tdp_pointer() should be under protection of tdp_pointer_lock. */ > +static void check_tdp_pointer_match(struct kvm *kvm) > +{ > + u64 tdp_pointer = INVALID_PAGE; > + bool valid_tdp = false; > + struct kvm_vcpu *vcpu; > + int i; > + > + kvm_for_each_vcpu(i, vcpu, kvm) { > + if (!valid_tdp) { > + tdp_pointer = vcpu->arch.tdp_pointer; > + valid_tdp = true; > + continue; > + } > + > + if (tdp_pointer != vcpu->arch.tdp_pointer) { > + kvm->arch.tdp_pointers_match = TDP_POINTERS_MISMATCH; > + return; > + } > + } > + > + kvm->arch.tdp_pointers_match = TDP_POINTERS_MATCH; > +} > + > +static int kvm_fill_hv_flush_list_func(struct hv_guest_mapping_flush_list *flush, > + void *data) > +{ > + struct kvm_tlb_range *range = data; > + > + return hyperv_fill_flush_guest_mapping_list(flush, range->start_gfn, > + range->pages); > +} > + > +static inline int __hv_remote_flush_tlb_with_range(struct kvm *kvm, > + struct kvm_vcpu *vcpu, struct kvm_tlb_range *range) > +{ > + u64 tdp_pointer = vcpu->arch.tdp_pointer; > + > + /* > + * FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE hypercall needs address > + * of the base of EPT PML4 table, strip off EPT configuration > + * information. > + */ > + if (range) > + return hyperv_flush_guest_mapping_range(tdp_pointer & PAGE_MASK, > + kvm_fill_hv_flush_list_func, (void *)range); > + else > + return hyperv_flush_guest_mapping(tdp_pointer & PAGE_MASK); > +} > + > +int kvm_hv_remote_flush_tlb_with_range(struct kvm *kvm, > + struct kvm_tlb_range *range) > +{ > + struct kvm_vcpu *vcpu; > + int ret = 0, i; > + > + spin_lock(&kvm->arch.tdp_pointer_lock); > + > + if (kvm->arch.tdp_pointers_match == TDP_POINTERS_CHECK) > + check_tdp_pointer_match(kvm); > + > + if (kvm->arch.tdp_pointers_match != TDP_POINTERS_MATCH) { > + kvm_for_each_vcpu(i, vcpu, kvm) { > + /* If tdp_pointer is invalid pointer, bypass flush request. */ > + if (VALID_PAGE(vcpu->arch.tdp_pointer)) > + ret |= __hv_remote_flush_tlb_with_range( > + kvm, vcpu, range); > + } > + } else { > + ret = __hv_remote_flush_tlb_with_range(kvm, > + kvm_get_vcpu(kvm, 0), range); > + } > + > + spin_unlock(&kvm->arch.tdp_pointer_lock); > + return ret; > +} > +EXPORT_SYMBOL_GPL(kvm_hv_remote_flush_tlb_with_range); > + > +int kvm_hv_remote_flush_tlb(struct kvm *kvm) > +{ > + return kvm_hv_remote_flush_tlb_with_range(kvm, NULL); > +} > +EXPORT_SYMBOL_GPL(kvm_hv_remote_flush_tlb); > +#endif > diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h > index e951af1fcb2c..b27c6f47f58d 100644 > --- a/arch/x86/kvm/hyperv.h > +++ b/arch/x86/kvm/hyperv.h > @@ -50,6 +50,12 @@ > /* Hyper-V HV_X64_MSR_SYNDBG_OPTIONS bits */ > #define HV_X64_SYNDBG_OPTION_USE_HCALLS BIT(2) > > +enum tdp_pointers_status { > + TDP_POINTERS_CHECK = 0, > + TDP_POINTERS_MATCH = 1, > + TDP_POINTERS_MISMATCH = 2 > +}; > + > static inline struct kvm_hv *to_kvm_hv(struct kvm *kvm) > { > return &kvm->arch.hyperv; > @@ -141,4 +147,18 @@ int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args); > int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, > struct kvm_cpuid_entry2 __user *entries); > > +#if IS_ENABLED(CONFIG_HYPERV) > +static inline void kvm_update_arch_tdp_pointer(struct kvm *kvm, > + struct kvm_vcpu *vcpu, u64 tdp_pointer) > +{ > + spin_lock(&kvm->arch.tdp_pointer_lock); > + vcpu->arch.tdp_pointer = tdp_pointer; > + kvm->arch.tdp_pointers_match = TDP_POINTERS_CHECK; > + spin_unlock(&kvm->arch.tdp_pointer_lock); > +} > + > +int kvm_hv_remote_flush_tlb(struct kvm *kvm); > +int kvm_hv_remote_flush_tlb_with_range(struct kvm *kvm, > + struct kvm_tlb_range *range); > +#endif > #endif > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index 50810d471462..67f607319eb7 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -62,6 +62,7 @@ > #include "vmcs12.h" > #include "vmx.h" > #include "x86.h" > +#include "hyperv.h" > > MODULE_AUTHOR("Qumranet"); > MODULE_LICENSE("GPL"); > @@ -472,83 +473,6 @@ static const u32 vmx_uret_msrs_list[] = { > static bool __read_mostly enlightened_vmcs = true; > module_param(enlightened_vmcs, bool, 0444); > > -/* check_ept_pointer() should be under protection of ept_pointer_lock. */ > -static void check_ept_pointer_match(struct kvm *kvm) > -{ > - struct kvm_vcpu *vcpu; > - u64 tmp_eptp = INVALID_PAGE; > - int i; > - > - kvm_for_each_vcpu(i, vcpu, kvm) { > - if (!VALID_PAGE(tmp_eptp)) { > - tmp_eptp = to_vmx(vcpu)->ept_pointer; > - } else if (tmp_eptp != to_vmx(vcpu)->ept_pointer) { > - to_kvm_vmx(kvm)->ept_pointers_match > - = EPT_POINTERS_MISMATCH; > - return; > - } > - } > - > - to_kvm_vmx(kvm)->ept_pointers_match = EPT_POINTERS_MATCH; > -} > - > -static int kvm_fill_hv_flush_list_func(struct hv_guest_mapping_flush_list *flush, > - void *data) > -{ > - struct kvm_tlb_range *range = data; > - > - return hyperv_fill_flush_guest_mapping_list(flush, range->start_gfn, > - range->pages); > -} > - > -static inline int __hv_remote_flush_tlb_with_range(struct kvm *kvm, > - struct kvm_vcpu *vcpu, struct kvm_tlb_range *range) > -{ > - u64 ept_pointer = to_vmx(vcpu)->ept_pointer; > - > - /* > - * FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE hypercall needs address > - * of the base of EPT PML4 table, strip off EPT configuration > - * information. > - */ > - if (range) > - return hyperv_flush_guest_mapping_range(ept_pointer & PAGE_MASK, > - kvm_fill_hv_flush_list_func, (void *)range); > - else > - return hyperv_flush_guest_mapping(ept_pointer & PAGE_MASK); > -} > - > -static int hv_remote_flush_tlb_with_range(struct kvm *kvm, > - struct kvm_tlb_range *range) > -{ > - struct kvm_vcpu *vcpu; > - int ret = 0, i; > - > - spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock); > - > - if (to_kvm_vmx(kvm)->ept_pointers_match == EPT_POINTERS_CHECK) > - check_ept_pointer_match(kvm); > - > - if (to_kvm_vmx(kvm)->ept_pointers_match != EPT_POINTERS_MATCH) { > - kvm_for_each_vcpu(i, vcpu, kvm) { > - /* If ept_pointer is invalid pointer, bypass flush request. */ > - if (VALID_PAGE(to_vmx(vcpu)->ept_pointer)) > - ret |= __hv_remote_flush_tlb_with_range( > - kvm, vcpu, range); > - } > - } else { > - ret = __hv_remote_flush_tlb_with_range(kvm, > - kvm_get_vcpu(kvm, 0), range); > - } > - > - spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock); > - return ret; > -} > -static int hv_remote_flush_tlb(struct kvm *kvm) > -{ > - return hv_remote_flush_tlb_with_range(kvm, NULL); > -} > - > static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu) > { > struct hv_enlightened_vmcs *evmcs; > @@ -3115,13 +3039,10 @@ static void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long pgd, > eptp = construct_eptp(vcpu, pgd, pgd_level); > vmcs_write64(EPT_POINTER, eptp); > > - if (kvm_x86_ops.tlb_remote_flush) { > - spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock); > - to_vmx(vcpu)->ept_pointer = eptp; > - to_kvm_vmx(kvm)->ept_pointers_match > - = EPT_POINTERS_CHECK; > - spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock); > - } > +#if IS_ENABLED(CONFIG_HYPERV) > + if (kvm_x86_ops.tlb_remote_flush) > + kvm_update_arch_tdp_pointer(kvm, vcpu, eptp); > +#endif > > if (!enable_unrestricted_guest && !is_paging(vcpu)) > guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr; > @@ -6989,8 +6910,6 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) > vmx->pi_desc.nv = POSTED_INTR_VECTOR; > vmx->pi_desc.sn = 1; > > - vmx->ept_pointer = INVALID_PAGE; > - > return 0; > > free_vmcs: > @@ -7007,8 +6926,6 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) > > static int vmx_vm_init(struct kvm *kvm) > { > - spin_lock_init(&to_kvm_vmx(kvm)->ept_pointer_lock); > - > if (!ple_gap) > kvm->arch.pause_in_guest = true; > > @@ -7818,9 +7735,9 @@ static __init int hardware_setup(void) > #if IS_ENABLED(CONFIG_HYPERV) > if (ms_hyperv.nested_features & HV_X64_NESTED_GUEST_MAPPING_FLUSH > && enable_ept) { > - vmx_x86_ops.tlb_remote_flush = hv_remote_flush_tlb; > + vmx_x86_ops.tlb_remote_flush = kvm_hv_remote_flush_tlb; > vmx_x86_ops.tlb_remote_flush_with_range = > - hv_remote_flush_tlb_with_range; > + kvm_hv_remote_flush_tlb_with_range; > } > #endif > > diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h > index 89da5e1251f1..d2e2ab46f5bb 100644 > --- a/arch/x86/kvm/vmx/vmx.h > +++ b/arch/x86/kvm/vmx/vmx.h > @@ -325,7 +325,6 @@ struct vcpu_vmx { > */ > u64 msr_ia32_feature_control; > u64 msr_ia32_feature_control_valid_bits; > - u64 ept_pointer; > > struct pt_desc pt_desc; > struct lbr_desc lbr_desc; > @@ -338,21 +337,12 @@ struct vcpu_vmx { > } shadow_msr_intercept; > }; > > -enum ept_pointers_status { > - EPT_POINTERS_CHECK = 0, > - EPT_POINTERS_MATCH = 1, > - EPT_POINTERS_MISMATCH = 2 > -}; > - > struct kvm_vmx { > struct kvm kvm; > > unsigned int tss_addr; > bool ept_identity_pagetable_done; > gpa_t ept_identity_map_addr; > - > - enum ept_pointers_status ept_pointers_match; > - spinlock_t ept_pointer_lock; > }; > > bool nested_vmx_allowed(struct kvm_vcpu *vcpu); > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 2a20ce60152e..f566e78b59b9 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -10115,6 +10115,10 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) > vcpu->arch.pending_external_vector = -1; > vcpu->arch.preempted_in_kernel = false; > > +#if IS_ENABLED(CONFIG_HYPERV) > + vcpu->arch.tdp_pointer = INVALID_PAGE; > +#endif > + > r = static_call(kvm_x86_vcpu_create)(vcpu); > if (r) > goto free_guest_fpu; > @@ -10470,7 +10474,6 @@ void kvm_arch_free_vm(struct kvm *kvm) > vfree(kvm); > } > > - Stray change? > int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) > { > if (type) > @@ -10498,6 +10501,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) > > kvm->arch.guest_can_read_msr_platform_info = true; > > +#if IS_ENABLED(CONFIG_HYPERV) > + spin_lock_init(&kvm->arch.tdp_pointer_lock); > +#endif > + > INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn); > INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn); -- Vitaly