Move the vp_bitmap "allocation" that's need to handle mismatched vp_index values down into sparse_set_to_vcpu_mask() and drop __always_inline from said helper. The vp_bitmap mess is a detail that's specific to the sparse translation and does not need to be exposed to the caller. The underlying motivation is to fudge around a compilation warning/error when CONFIG_KASAN_STACK=y, which is selected (and can't be unselected) by CONFIG_KASAN=y when compiling with gcc (clang/LLVM is a stack hog in some cases so it's opt-in for clang). KASAN_STACK adds a redzone around every stack variable, which pushes the Hyper-V functions over the default limit of 1024. With CONFIG_KVM_WERROR=y, this breaks the build. Shuffling which function is charged with vp_bitmap gets all functions below the default limit. Regarding the __always_inline, prior to commit f21dd494506a ("KVM: x86: hyperv: optimize sparse VP set processing") the helper, then named hv_vcpu_in_sparse_set(), was a tiny bit of code that effectively boiled down to a handful of bit ops. The __always_inline was understandable, if not justifiable. Since the aforementioned change, sparse_set_to_vcpu_mask() is a chunky 350-450+ bytes of code without KASAN=y, and balloons to 1100+ with KASAN=y. In other words, it has no business being forcefully inlined. Reported-by: Ajay Garg <ajaygargnsit@xxxxxxxxx> Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx> --- Vitaly (and anyone with extensive KVM + Hyper-V knowledge), it would be really helpful to get better coverage in kvm-unit-tests. There's a smoke test for this in selftests, but it's not really all that interesting. It took me over an hour and a half just to get a Linux guest to hit the relevant flows. Most of that was due to QEMU 5.1 bugs (doesn't advertise HYPERCALL MSR by default) and Linux guest stupidity (silently disables itself if said MSR isn't available), but it was really annoying to have to go digging through QEMU to figure out how to even enable features that are extensive/critical enough to warrant their own tests. /wave to the clang folks for the pattern patch on the changelog ;-) arch/x86/kvm/hyperv.c | 55 ++++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 4f15c0165c05..80018cfab5c7 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1710,31 +1710,36 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) return kvm_hv_get_msr(vcpu, msr, pdata, host); } -static __always_inline unsigned long *sparse_set_to_vcpu_mask( - struct kvm *kvm, u64 *sparse_banks, u64 valid_bank_mask, - u64 *vp_bitmap, unsigned long *vcpu_bitmap) +static void sparse_set_to_vcpu_mask(struct kvm *kvm, u64 *sparse_banks, + u64 valid_bank_mask, unsigned long *vcpu_mask) { struct kvm_hv *hv = to_kvm_hv(kvm); + bool has_mismatch = atomic_read(&hv->num_mismatched_vp_indexes); + u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS]; struct kvm_vcpu *vcpu; int i, bank, sbank = 0; + u64 *bitmap; - memset(vp_bitmap, 0, - KVM_HV_MAX_SPARSE_VCPU_SET_BITS * sizeof(*vp_bitmap)); + BUILD_BUG_ON(sizeof(vp_bitmap) > + sizeof(*vcpu_mask) * BITS_TO_LONGS(KVM_MAX_VCPUS)); + + /* If vp_index == vcpu_idx for all vCPUs, fill vcpu_mask directly. */ + if (likely(!has_mismatch)) + bitmap = (u64 *)vcpu_mask; + + memset(bitmap, 0, sizeof(vp_bitmap)); for_each_set_bit(bank, (unsigned long *)&valid_bank_mask, KVM_HV_MAX_SPARSE_VCPU_SET_BITS) - vp_bitmap[bank] = sparse_banks[sbank++]; + bitmap[bank] = sparse_banks[sbank++]; - if (likely(!atomic_read(&hv->num_mismatched_vp_indexes))) { - /* for all vcpus vp_index == vcpu_idx */ - return (unsigned long *)vp_bitmap; - } + if (likely(!has_mismatch)) + return; - bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS); + bitmap_zero(vcpu_mask, KVM_MAX_VCPUS); kvm_for_each_vcpu(i, vcpu, kvm) { if (test_bit(kvm_hv_get_vpindex(vcpu), (unsigned long *)vp_bitmap)) - __set_bit(i, vcpu_bitmap); + __set_bit(i, vcpu_mask); } - return vcpu_bitmap; } struct kvm_hv_hcall { @@ -1756,9 +1761,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool struct kvm *kvm = vcpu->kvm; struct hv_tlb_flush_ex flush_ex; struct hv_tlb_flush flush; - u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS]; - DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS); - unsigned long *vcpu_mask; + DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS); u64 valid_bank_mask; u64 sparse_banks[64]; int sparse_banks_len; @@ -1842,11 +1845,9 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool if (all_cpus) { kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH_GUEST); } else { - vcpu_mask = sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, - vp_bitmap, vcpu_bitmap); + sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, vcpu_mask); - kvm_make_vcpus_request_mask(kvm, KVM_REQ_TLB_FLUSH_GUEST, - vcpu_mask); + kvm_make_vcpus_request_mask(kvm, KVM_REQ_TLB_FLUSH_GUEST, vcpu_mask); } ret_success: @@ -1879,9 +1880,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool struct kvm *kvm = vcpu->kvm; struct hv_send_ipi_ex send_ipi_ex; struct hv_send_ipi send_ipi; - u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS]; - DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS); - unsigned long *vcpu_mask; + DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS); unsigned long valid_bank_mask; u64 sparse_banks[64]; int sparse_banks_len; @@ -1937,11 +1936,13 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) return HV_STATUS_INVALID_HYPERCALL_INPUT; - vcpu_mask = all_cpus ? NULL : - sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, - vp_bitmap, vcpu_bitmap); + if (all_cpus) { + kvm_send_ipi_to_many(kvm, vector, NULL); + } else { + sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, vcpu_mask); - kvm_send_ipi_to_many(kvm, vector, vcpu_mask); + kvm_send_ipi_to_many(kvm, vector, vcpu_mask); + } ret_success: return HV_STATUS_SUCCESS; -- 2.33.0.1079.g6e70778dc9-goog