Get rid of on-stack allocation of vcpu_mask and optimize kvm_hv_send_ipi() for a smaller number of vCPUs in the request. When Hyper-V TLB flush is in use, HvSendSyntheticClusterIpi{,Ex} calls are not commonly used to send IPIs to a large number of vCPUs (and are rarely used in general). Introduce hv_is_vp_in_sparse_set() to directly check if the specified VP_ID is present in sparse vCPU set. Signed-off-by: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx> --- arch/x86/kvm/hyperv.c | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index d7671af8d754..c4b411cd7b00 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1741,6 +1741,28 @@ static void sparse_set_to_vcpu_mask(struct kvm *kvm, u64 *sparse_banks, } } +static bool hv_is_vp_in_sparse_set(u32 vp_id, u64 valid_bank_mask, u64 sparse_banks[]) +{ + int valid_bit_nr = vp_id / HV_VCPUS_PER_SPARSE_BANK; + unsigned long sbank; + + if (!test_bit(valid_bit_nr, (unsigned long *)&valid_bank_mask)) + return false; + + /* + * The index into the sparse bank is the number of preceding bits in + * the valid mask. Optimize for VMs with <64 vCPUs by skipping the + * fancy math if there can't possibly be preceding bits. + */ + if (valid_bit_nr) + sbank = hweight64(valid_bank_mask & GENMASK_ULL(valid_bit_nr - 1, 0)); + else + sbank = 0; + + return test_bit(vp_id % HV_VCPUS_PER_SPARSE_BANK, + (unsigned long *)&sparse_banks[sbank]); +} + struct kvm_hv_hcall { u64 param; u64 ingpa; @@ -2023,8 +2045,8 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) ((u64)hc->rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET); } -static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector, - unsigned long *vcpu_bitmap) +static void kvm_hv_send_ipi_to_many(struct kvm *kvm, u32 vector, + u64 *sparse_banks, u64 valid_bank_mask) { struct kvm_lapic_irq irq = { .delivery_mode = APIC_DM_FIXED, @@ -2034,7 +2056,10 @@ static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector, unsigned long i; kvm_for_each_vcpu(i, vcpu, kvm) { - if (vcpu_bitmap && !test_bit(i, vcpu_bitmap)) + if (sparse_banks && + !hv_is_vp_in_sparse_set(kvm_hv_get_vpindex(vcpu), + valid_bank_mask, + sparse_banks)) continue; /* We fail only when APIC is disabled */ @@ -2047,7 +2072,6 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) struct kvm *kvm = vcpu->kvm; struct hv_send_ipi_ex send_ipi_ex; struct hv_send_ipi send_ipi; - DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS); u64 valid_bank_mask; u64 sparse_banks[KVM_HV_MAX_SPARSE_VCPU_SET_BITS]; u32 vector; @@ -2109,13 +2133,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) return HV_STATUS_INVALID_HYPERCALL_INPUT; - if (all_cpus) { - kvm_send_ipi_to_many(kvm, vector, NULL); - } else { - sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, vcpu_mask); - - kvm_send_ipi_to_many(kvm, vector, vcpu_mask); - } + kvm_hv_send_ipi_to_many(kvm, vector, all_cpus ? NULL : sparse_banks, valid_bank_mask); ret_success: return HV_STATUS_SUCCESS; -- 2.37.3