To allow flushing individual GVAs instead of always flushing the whole VPID a per-vCPU structure to pass the requests is needed. Introduce a simple ring write-locked structure to hold two types of entries: individual GVA (GFN + up to 4095 following GFNs in the lower 12 bits) and 'flush all'. The queuing rule is: if there's not enough space on the ring to put the request and leave at least 1 entry for 'flush all' - put 'flush all' entry. The size of the ring is arbitrary set to '16'. Note, kvm_hv_flush_tlb() only queues 'flush all' entries for now so there's very small functional change but the infrastructure is prepared to handle individual GVA flush requests. Signed-off-by: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 16 +++++++ arch/x86/kvm/hyperv.c | 83 +++++++++++++++++++++++++++++++++ arch/x86/kvm/hyperv.h | 13 ++++++ arch/x86/kvm/x86.c | 5 +- arch/x86/kvm/x86.h | 1 + 5 files changed, 116 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 1de3ad9308d8..b4dd2ff61658 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -578,6 +578,20 @@ struct kvm_vcpu_hv_synic { bool dont_zero_synic_pages; }; +#define KVM_HV_TLB_FLUSH_RING_SIZE (16) + +struct kvm_vcpu_hv_tlb_flush_entry { + u64 addr; + u64 flush_all:1; + u64 pad:63; +}; + +struct kvm_vcpu_hv_tlb_flush_ring { + int read_idx, write_idx; + spinlock_t write_lock; + struct kvm_vcpu_hv_tlb_flush_entry entries[KVM_HV_TLB_FLUSH_RING_SIZE]; +}; + /* Hyper-V per vcpu emulation context */ struct kvm_vcpu_hv { struct kvm_vcpu *vcpu; @@ -597,6 +611,8 @@ struct kvm_vcpu_hv { u32 enlightenments_ebx; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EBX */ u32 syndbg_cap_eax; /* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */ } cpuid_cache; + + struct kvm_vcpu_hv_tlb_flush_ring tlb_flush_ring; }; /* Xen HVM per vcpu emulation context */ diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index b402ad059eb9..fb716cf919ed 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -29,6 +29,7 @@ #include <linux/kvm_host.h> #include <linux/highmem.h> #include <linux/sched/cputime.h> +#include <linux/spinlock.h> #include <linux/eventfd.h> #include <asm/apicdef.h> @@ -954,6 +955,8 @@ static int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu) hv_vcpu->vp_index = vcpu->vcpu_idx; + spin_lock_init(&hv_vcpu->tlb_flush_ring.write_lock); + return 0; } @@ -1789,6 +1792,74 @@ static u64 kvm_get_sparse_vp_set(struct kvm *kvm, struct kvm_hv_hcall *hc, var_cnt * sizeof(*sparse_banks)); } +static inline int hv_tlb_flush_ring_free(struct kvm_vcpu_hv *hv_vcpu, + int read_idx, int write_idx) +{ + if (write_idx >= read_idx) + return KVM_HV_TLB_FLUSH_RING_SIZE - (write_idx - read_idx) - 1; + + return read_idx - write_idx - 1; +} + +static void hv_tlb_flush_ring_enqueue(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_hv_tlb_flush_ring *tlb_flush_ring; + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + int ring_free, write_idx, read_idx; + unsigned long flags; + + if (!hv_vcpu) + return; + + tlb_flush_ring = &hv_vcpu->tlb_flush_ring; + + spin_lock_irqsave(&tlb_flush_ring->write_lock, flags); + + /* + * 'read_idx' is updated by the vCPU which does the flush, this + * happens without 'tlb_flush_ring->write_lock' being held; make + * sure we read it once. + */ + read_idx = READ_ONCE(tlb_flush_ring->read_idx); + /* + * 'write_idx' is only updated here, under 'tlb_flush_ring->write_lock'. + * allow the compiler to re-read it, it can't change. + */ + write_idx = tlb_flush_ring->write_idx; + + ring_free = hv_tlb_flush_ring_free(hv_vcpu, read_idx, write_idx); + /* Full ring always contains 'flush all' entry */ + if (!ring_free) + goto out_unlock; + + tlb_flush_ring->entries[write_idx].addr = 0; + tlb_flush_ring->entries[write_idx].flush_all = 1; + /* + * Advance write index only after filling in the entry to + * synchronize with lockless reader. + */ + smp_wmb(); + tlb_flush_ring->write_idx = (write_idx + 1) % KVM_HV_TLB_FLUSH_RING_SIZE; + +out_unlock: + spin_unlock_irqrestore(&tlb_flush_ring->write_lock, flags); +} + +void kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_hv_tlb_flush_ring *tlb_flush_ring; + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + + kvm_vcpu_flush_tlb_guest(vcpu); + + if (!hv_vcpu) + return; + + tlb_flush_ring = &hv_vcpu->tlb_flush_ring; + + tlb_flush_ring->read_idx = tlb_flush_ring->write_idx; +} + static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) { struct kvm *kvm = vcpu->kvm; @@ -1797,6 +1868,8 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS); u64 valid_bank_mask; u64 sparse_banks[KVM_HV_MAX_SPARSE_VCPU_SET_BITS]; + struct kvm_vcpu *v; + unsigned long i; bool all_cpus; /* @@ -1876,10 +1949,20 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) * analyze it here, flush TLB regardless of the specified address space. */ if (all_cpus) { + kvm_for_each_vcpu(i, v, kvm) + hv_tlb_flush_ring_enqueue(v); + kvm_make_all_cpus_request(kvm, KVM_REQ_HV_TLB_FLUSH); } else { sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, vcpu_mask); + for_each_set_bit(i, vcpu_mask, KVM_MAX_VCPUS) { + v = kvm_get_vcpu(kvm, i); + if (!v) + continue; + hv_tlb_flush_ring_enqueue(v); + } + kvm_make_vcpus_request_mask(kvm, KVM_REQ_HV_TLB_FLUSH, vcpu_mask); } diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index da2737f2a956..6847caeaaf84 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -147,4 +147,17 @@ int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args); int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 __user *entries); + +static inline void kvm_hv_vcpu_empty_flush_tlb(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + + if (!hv_vcpu) + return; + + hv_vcpu->tlb_flush_ring.read_idx = hv_vcpu->tlb_flush_ring.write_idx; +} +void kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu); + + #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f633cff8cd7f..e5aec386d299 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3324,7 +3324,7 @@ static void kvm_vcpu_flush_tlb_all(struct kvm_vcpu *vcpu) static_call(kvm_x86_flush_tlb_all)(vcpu); } -static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu) +void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu) { ++vcpu->stat.tlb_flush; @@ -3362,7 +3362,8 @@ void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu)) { kvm_vcpu_flush_tlb_guest(vcpu); - kvm_clear_request(KVM_REQ_HV_TLB_FLUSH, vcpu); + if (kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu)) + kvm_hv_vcpu_empty_flush_tlb(vcpu); } else if (kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu)) { kvm_vcpu_flush_tlb_guest(vcpu); } diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 588792f00334..2324f496c500 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -58,6 +58,7 @@ static inline unsigned int __shrink_ple_window(unsigned int val, #define MSR_IA32_CR_PAT_DEFAULT 0x0007040600070406ULL +void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu); void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu); int kvm_check_nested_events(struct kvm_vcpu *vcpu); -- 2.35.1