Sean Christopherson <seanjc@xxxxxxxxxx> writes: > On Thu, Apr 14, 2022, Vitaly Kuznetsov wrote: >> To allow flushing individual GVAs instead of always flushing the whole >> VPID a per-vCPU structure to pass the requests is needed. Introduce a >> simple ring write-locked structure to hold two types of entries: >> individual GVA (GFN + up to 4095 following GFNs in the lower 12 bits) >> and 'flush all'. >> >> The queuing rule is: if there's not enough space on the ring to put >> the request and leave at least 1 entry for 'flush all' - put 'flush >> all' entry. >> >> The size of the ring is arbitrary set to '16'. >> >> Note, kvm_hv_flush_tlb() only queues 'flush all' entries for now so >> there's very small functional change but the infrastructure is >> prepared to handle individual GVA flush requests. >> >> Signed-off-by: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx> >> --- >> arch/x86/include/asm/kvm_host.h | 16 +++++++ >> arch/x86/kvm/hyperv.c | 83 +++++++++++++++++++++++++++++++++ >> arch/x86/kvm/hyperv.h | 13 ++++++ >> arch/x86/kvm/x86.c | 5 +- >> arch/x86/kvm/x86.h | 1 + >> 5 files changed, 116 insertions(+), 2 deletions(-) >> >> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h >> index 1de3ad9308d8..b4dd2ff61658 100644 >> --- a/arch/x86/include/asm/kvm_host.h >> +++ b/arch/x86/include/asm/kvm_host.h >> @@ -578,6 +578,20 @@ struct kvm_vcpu_hv_synic { >> bool dont_zero_synic_pages; >> }; >> >> +#define KVM_HV_TLB_FLUSH_RING_SIZE (16) >> + >> +struct kvm_vcpu_hv_tlb_flush_entry { >> + u64 addr; > > "addr" misleading, this is overloaded to be both the virtual address and the count. > I think we make it a moot point, but it led me astray in thinkin we could use the > lower 12 bits for flags... until I realized those bits are already in use. > >> + u64 flush_all:1; >> + u64 pad:63; > > This is rather odd, why not just use a bool? My initial plan was to eventually put more flags here, i.e. there are two additional flags which we don't currently handle: HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES (as we don't actually look at HV_ADDRESS_SPACE_ID) HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY > But why even have a "flush_all" field, can't we just use a magic value > for write_idx to indicate "flush_all"? E.g. either an explicit #define > or -1. Sure, a magic value would do too and will allow us to make 'struct kvm_vcpu_hv_tlb_flush_entry' 8 bytes instead of 16 (for the time being as if we are to add HV_ADDRESS_SPACE_ID/additional flags the net win is going to be zero). > > Writers set write_idx to -1 to indicate "flush all", vCPU/reader goes straight > to "flush all" if write_idx is -1/invalid. That way, future writes can simply do > nothing until read_idx == write_idx, and the vCPU/reader avoids unnecessary flushes > if there's a "flush all" pending and other valid entries in the ring. > > And it allows deferring the "flush all" until the ring is truly full (unless there's > an off-by-one / wraparound edge case I'm missing, which is likely...). Thanks for the patch! I am, however, going to look at Maxim's suggestion to use 'kfifo' to avoid all these uncertainties, funky locking etc. At first glance it has everything I need here. > > --- > arch/x86/include/asm/kvm_host.h | 8 +----- > arch/x86/kvm/hyperv.c | 47 +++++++++++++-------------------- > 2 files changed, 19 insertions(+), 36 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index b6b9a71a4591..bb45cc383ce4 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -605,16 +605,10 @@ enum hv_tlb_flush_rings { > HV_NR_TLB_FLUSH_RINGS, > }; > > -struct kvm_vcpu_hv_tlb_flush_entry { > - u64 addr; > - u64 flush_all:1; > - u64 pad:63; > -}; > - > struct kvm_vcpu_hv_tlb_flush_ring { > int read_idx, write_idx; > spinlock_t write_lock; > - struct kvm_vcpu_hv_tlb_flush_entry entries[KVM_HV_TLB_FLUSH_RING_SIZE]; > + u64 entries[KVM_HV_TLB_FLUSH_RING_SIZE]; > }; > > /* Hyper-V per vcpu emulation context */ > diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c > index 1d6927538bc7..56f06cf85282 100644 > --- a/arch/x86/kvm/hyperv.c > +++ b/arch/x86/kvm/hyperv.c > @@ -1837,10 +1837,13 @@ static int kvm_hv_get_tlb_flush_entries(struct kvm *kvm, struct kvm_hv_hcall *hc > static inline int hv_tlb_flush_ring_free(struct kvm_vcpu_hv *hv_vcpu, > int read_idx, int write_idx) > { > + if (write_idx < 0) > + return 0; > + > if (write_idx >= read_idx) > - return KVM_HV_TLB_FLUSH_RING_SIZE - (write_idx - read_idx) - 1; > + return KVM_HV_TLB_FLUSH_RING_SIZE - (write_idx - read_idx); > > - return read_idx - write_idx - 1; > + return read_idx - write_idx; > } > > static void hv_tlb_flush_ring_enqueue(struct kvm_vcpu *vcpu, > @@ -1869,6 +1872,9 @@ static void hv_tlb_flush_ring_enqueue(struct kvm_vcpu *vcpu, > */ > write_idx = tlb_flush_ring->write_idx; > > + if (write_idx < 0 && read_idx == write_idx) > + read_idx = write_idx = 0; > + > ring_free = hv_tlb_flush_ring_free(hv_vcpu, read_idx, write_idx); > /* Full ring always contains 'flush all' entry */ > if (!ring_free) > @@ -1879,21 +1885,13 @@ static void hv_tlb_flush_ring_enqueue(struct kvm_vcpu *vcpu, > * entry in case another request comes in. In case there's not enough > * space, just put 'flush all' entry there. > */ > - if (!count || count >= ring_free - 1 || !entries) { > - tlb_flush_ring->entries[write_idx].addr = 0; > - tlb_flush_ring->entries[write_idx].flush_all = 1; > - /* > - * Advance write index only after filling in the entry to > - * synchronize with lockless reader. > - */ > - smp_wmb(); > - tlb_flush_ring->write_idx = (write_idx + 1) % KVM_HV_TLB_FLUSH_RING_SIZE; > + if (!count || count > ring_free - 1 || !entries) { > + tlb_flush_ring->write_idx = -1; > goto out_unlock; > } > > for (i = 0; i < count; i++) { > - tlb_flush_ring->entries[write_idx].addr = entries[i]; > - tlb_flush_ring->entries[write_idx].flush_all = 0; > + tlb_flush_ring->entries[write_idx] = entries[i]; > write_idx = (write_idx + 1) % KVM_HV_TLB_FLUSH_RING_SIZE; > } > /* > @@ -1911,7 +1909,6 @@ void kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu) > { > struct kvm_vcpu_hv_tlb_flush_ring *tlb_flush_ring; > struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); > - struct kvm_vcpu_hv_tlb_flush_entry *entry; > int read_idx, write_idx; > u64 address; > u32 count; > @@ -1940,26 +1937,18 @@ void kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu) > /* Pairs with smp_wmb() in hv_tlb_flush_ring_enqueue() */ > smp_rmb(); > > + if (write_idx < 0) { > + kvm_vcpu_flush_tlb_guest(vcpu); > + goto out_empty_ring; > + } > + > for (i = read_idx; i != write_idx; i = (i + 1) % KVM_HV_TLB_FLUSH_RING_SIZE) { > - entry = &tlb_flush_ring->entries[i]; > - > - if (entry->flush_all) > - goto out_flush_all; > - > - /* > - * Lower 12 bits of 'address' encode the number of additional > - * pages to flush. > - */ > - address = entry->addr & PAGE_MASK; > - count = (entry->addr & ~PAGE_MASK) + 1; > + address = tlb_flush_ring->entries[i] & PAGE_MASK; > + count = (tlb_flush_ring->entries[i] & ~PAGE_MASK) + 1; > for (j = 0; j < count; j++) > static_call(kvm_x86_flush_tlb_gva)(vcpu, address + j * PAGE_SIZE); > } > ++vcpu->stat.tlb_flush; > - goto out_empty_ring; > - > -out_flush_all: > - kvm_vcpu_flush_tlb_guest(vcpu); > > out_empty_ring: > tlb_flush_ring->read_idx = write_idx; > > base-commit: 62592c7c742ae78eb1f1005a63965ece19e6effe > -- > -- Vitaly