On Thu, Apr 14, 2022, Vitaly Kuznetsov wrote: > @@ -1862,15 +1890,58 @@ void kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu) > { > struct kvm_vcpu_hv_tlb_flush_ring *tlb_flush_ring; > struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); > + struct kvm_vcpu_hv_tlb_flush_entry *entry; > + int read_idx, write_idx; > + u64 address; > + u32 count; > + int i, j; > > - kvm_vcpu_flush_tlb_guest(vcpu); > - > - if (!hv_vcpu) > + if (!tdp_enabled || !hv_vcpu) { > + kvm_vcpu_flush_tlb_guest(vcpu); > return; > + } > > tlb_flush_ring = &hv_vcpu->tlb_flush_ring; > > - tlb_flush_ring->read_idx = tlb_flush_ring->write_idx; > + /* > + * TLB flush must be performed on the target vCPU so 'read_idx' > + * (AKA 'tail') cannot change underneath, the compiler is free > + * to re-read it. > + */ > + read_idx = tlb_flush_ring->read_idx; > + > + /* > + * 'write_idx' (AKA 'head') can be concurently updated by a different > + * vCPU so we must be sure it's read once. > + */ > + write_idx = READ_ONCE(tlb_flush_ring->write_idx); > + > + /* Pairs with smp_wmb() in hv_tlb_flush_ring_enqueue() */ > + smp_rmb(); > + > + for (i = read_idx; i != write_idx; i = (i + 1) % KVM_HV_TLB_FLUSH_RING_SIZE) { > + entry = &tlb_flush_ring->entries[i]; > + > + if (entry->flush_all) > + goto out_flush_all; > + > + /* > + * Lower 12 bits of 'address' encode the number of additional > + * pages to flush. > + */ > + address = entry->addr & PAGE_MASK; > + count = (entry->addr & ~PAGE_MASK) + 1; > + for (j = 0; j < count; j++) > + static_call(kvm_x86_flush_tlb_gva)(vcpu, address + j * PAGE_SIZE); > + } > + ++vcpu->stat.tlb_flush; Bumping tlb_flush is inconsistent with how KVM handles INVLPG, and could be wrong if the ring is empty (might be impossible without a bug?). And if my math is right, or at least in the ballpark, tlb_flush will be incremented once regardless of whether the loop flushed 1 page or 64k pages (completely full ring, full count on every one). I'd prefer to either drop the stat adjustment entirely, or bump invlpg in the loop, e.g. diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 56f06cf85282..5654c9d56289 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1945,10 +1945,11 @@ void kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu) for (i = read_idx; i != write_idx; i = (i + 1) % KVM_HV_TLB_FLUSH_RING_SIZE) { address = tlb_flush_ring->entries[i] & PAGE_MASK; count = (tlb_flush_ring->entries[i] & ~PAGE_MASK) + 1; - for (j = 0; j < count; j++) + for (j = 0; j < count; j++) { static_call(kvm_x86_flush_tlb_gva)(vcpu, address + j * PAGE_SIZE); + ++vcpu->stat.invlpg; + } } - ++vcpu->stat.tlb_flush; out_empty_ring: tlb_flush_ring->read_idx = write_idx; > + goto out_empty_ring; > + > +out_flush_all: > + kvm_vcpu_flush_tlb_guest(vcpu); > + > +out_empty_ring: > + tlb_flush_ring->read_idx = write_idx; > } >