Yuan Yao <yuan.yao@xxxxxxxxxxxxxxx> writes: > On Wed, May 25, 2022 at 11:01:01AM +0200, Vitaly Kuznetsov wrote: >> Currently, HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST{,EX} calls are handled >> the exact same way as HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE{,EX}: by >> flushing the whole VPID and this is sub-optimal. Switch to handling >> these requests with 'flush_tlb_gva()' hooks instead. Use the newly >> introduced TLB flush fifo to queue the requests. >> >> Signed-off-by: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx> >> --- >> arch/x86/kvm/hyperv.c | 102 +++++++++++++++++++++++++++++++++++++----- >> 1 file changed, 90 insertions(+), 12 deletions(-) >> >> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c >> index 762b0b699fdf..576749973727 100644 >> --- a/arch/x86/kvm/hyperv.c >> +++ b/arch/x86/kvm/hyperv.c >> @@ -1806,32 +1806,84 @@ static u64 kvm_get_sparse_vp_set(struct kvm *kvm, struct kvm_hv_hcall *hc, >> sparse_banks, consumed_xmm_halves, offset); >> } >> >> -static void hv_tlb_flush_enqueue(struct kvm_vcpu *vcpu) >> +static int kvm_hv_get_tlb_flush_entries(struct kvm *kvm, struct kvm_hv_hcall *hc, u64 entries[], >> + int consumed_xmm_halves, gpa_t offset) >> +{ >> + return kvm_hv_get_hc_data(kvm, hc, hc->rep_cnt, hc->rep_cnt, >> + entries, consumed_xmm_halves, offset); >> +} >> + >> +static void hv_tlb_flush_enqueue(struct kvm_vcpu *vcpu, u64 *entries, int count) >> { >> struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo; >> struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); >> u64 entry = KVM_HV_TLB_FLUSHALL_ENTRY; >> + unsigned long flags; >> >> if (!hv_vcpu) >> return; >> >> tlb_flush_fifo = &hv_vcpu->tlb_flush_fifo; >> >> - kfifo_in_spinlocked(&tlb_flush_fifo->entries, &entry, 1, &tlb_flush_fifo->write_lock); >> + spin_lock_irqsave(&tlb_flush_fifo->write_lock, flags); >> + >> + /* >> + * All entries should fit on the fifo leaving one free for 'flush all' >> + * entry in case another request comes in. In case there's not enough >> + * space, just put 'flush all' entry there. >> + */ >> + if (count && entries && count < kfifo_avail(&tlb_flush_fifo->entries)) { >> + WARN_ON(kfifo_in(&tlb_flush_fifo->entries, entries, count) != count); >> + goto out_unlock; >> + } >> + >> + /* >> + * Note: full fifo always contains 'flush all' entry, no need to check the >> + * return value. >> + */ >> + kfifo_in(&tlb_flush_fifo->entries, &entry, 1); >> + >> +out_unlock: >> + spin_unlock_irqrestore(&tlb_flush_fifo->write_lock, flags); >> } >> >> void kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu) > > Where's the caller to this kvm_hv_vcpu_flush_tlb() ? > I didn't see th caller in patch 1-22 and remains are > self-testing patches, any thing I missed ? No, I screwed up. It was present in kvm_service_local_tlb_flush_requests() in previous versions but somehow disappeared. This also means that I haven't tested this properly. Thanks for catching this! v5 is coming to rescue! > >> { >> struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo; >> struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); >> + u64 entries[KVM_HV_TLB_FLUSH_FIFO_SIZE]; >> + int i, j, count; >> + gva_t gva; >> >> - kvm_vcpu_flush_tlb_guest(vcpu); >> - >> - if (!hv_vcpu) >> + if (!tdp_enabled || !hv_vcpu) { >> + kvm_vcpu_flush_tlb_guest(vcpu); >> return; >> + } >> >> tlb_flush_fifo = &hv_vcpu->tlb_flush_fifo; >> >> + count = kfifo_out(&tlb_flush_fifo->entries, entries, KVM_HV_TLB_FLUSH_FIFO_SIZE); >> + >> + for (i = 0; i < count; i++) { >> + if (entries[i] == KVM_HV_TLB_FLUSHALL_ENTRY) >> + goto out_flush_all; >> + >> + /* >> + * Lower 12 bits of 'address' encode the number of additional >> + * pages to flush. >> + */ >> + gva = entries[i] & PAGE_MASK; >> + for (j = 0; j < (entries[i] & ~PAGE_MASK) + 1; j++) >> + static_call(kvm_x86_flush_tlb_gva)(vcpu, gva + j * PAGE_SIZE); >> + >> + ++vcpu->stat.tlb_flush; >> + } >> + goto out_empty_ring; >> + >> +out_flush_all: >> + kvm_vcpu_flush_tlb_guest(vcpu); >> + >> +out_empty_ring: >> kfifo_reset_out(&tlb_flush_fifo->entries); >> } >> >> @@ -1841,11 +1893,21 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) >> struct hv_tlb_flush_ex flush_ex; >> struct hv_tlb_flush flush; >> DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS); >> + /* >> + * Normally, there can be no more than 'KVM_HV_TLB_FLUSH_FIFO_SIZE' >> + * entries on the TLB flush fifo. The last entry, however, needs to be >> + * always left free for 'flush all' entry which gets placed when >> + * there is not enough space to put all the requested entries. >> + */ >> + u64 __tlb_flush_entries[KVM_HV_TLB_FLUSH_FIFO_SIZE - 1]; >> + u64 *tlb_flush_entries; >> u64 valid_bank_mask; >> u64 sparse_banks[KVM_HV_MAX_SPARSE_VCPU_SET_BITS]; >> struct kvm_vcpu *v; >> unsigned long i; >> bool all_cpus; >> + int consumed_xmm_halves = 0; >> + gpa_t data_offset; >> >> /* >> * The Hyper-V TLFS doesn't allow more than 64 sparse banks, e.g. the >> @@ -1861,10 +1923,12 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) >> flush.address_space = hc->ingpa; >> flush.flags = hc->outgpa; >> flush.processor_mask = sse128_lo(hc->xmm[0]); >> + consumed_xmm_halves = 1; >> } else { >> if (unlikely(kvm_read_guest(kvm, hc->ingpa, >> &flush, sizeof(flush)))) >> return HV_STATUS_INVALID_HYPERCALL_INPUT; >> + data_offset = sizeof(flush); >> } >> >> trace_kvm_hv_flush_tlb(flush.processor_mask, >> @@ -1888,10 +1952,12 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) >> flush_ex.flags = hc->outgpa; >> memcpy(&flush_ex.hv_vp_set, >> &hc->xmm[0], sizeof(hc->xmm[0])); >> + consumed_xmm_halves = 2; >> } else { >> if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex, >> sizeof(flush_ex)))) >> return HV_STATUS_INVALID_HYPERCALL_INPUT; >> + data_offset = sizeof(flush_ex); >> } >> >> trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask, >> @@ -1907,25 +1973,37 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) >> return HV_STATUS_INVALID_HYPERCALL_INPUT; >> >> if (all_cpus) >> - goto do_flush; >> + goto read_flush_entries; >> >> if (!hc->var_cnt) >> goto ret_success; >> >> - if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks, 2, >> - offsetof(struct hv_tlb_flush_ex, >> - hv_vp_set.bank_contents))) >> + if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks, consumed_xmm_halves, >> + data_offset)) >> + return HV_STATUS_INVALID_HYPERCALL_INPUT; >> + data_offset += hc->var_cnt * sizeof(sparse_banks[0]); >> + consumed_xmm_halves += hc->var_cnt; >> + } >> + >> +read_flush_entries: >> + if (hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE || >> + hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX || >> + hc->rep_cnt > ARRAY_SIZE(__tlb_flush_entries)) { >> + tlb_flush_entries = NULL; >> + } else { >> + if (kvm_hv_get_tlb_flush_entries(kvm, hc, __tlb_flush_entries, >> + consumed_xmm_halves, data_offset)) >> return HV_STATUS_INVALID_HYPERCALL_INPUT; >> + tlb_flush_entries = __tlb_flush_entries; >> } >> >> -do_flush: >> /* >> * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't >> * analyze it here, flush TLB regardless of the specified address space. >> */ >> if (all_cpus) { >> kvm_for_each_vcpu(i, v, kvm) >> - hv_tlb_flush_enqueue(v); >> + hv_tlb_flush_enqueue(v, tlb_flush_entries, hc->rep_cnt); >> >> kvm_make_all_cpus_request(kvm, KVM_REQ_HV_TLB_FLUSH); >> } else { >> @@ -1935,7 +2013,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) >> v = kvm_get_vcpu(kvm, i); >> if (!v) >> continue; >> - hv_tlb_flush_enqueue(v); >> + hv_tlb_flush_enqueue(v, tlb_flush_entries, hc->rep_cnt); >> } >> >> kvm_make_vcpus_request_mask(kvm, KVM_REQ_HV_TLB_FLUSH, vcpu_mask); >> -- >> 2.35.3 >> > -- Vitaly