On Sat, 23 Jun 2018 at 03:14, Radim Krčmář <rkrcmar@xxxxxxxxxx> wrote: > > 2018-06-22 16:56+0200, Vitaly Kuznetsov: > > Using hypercall for sending IPIs is faster because this allows to specify > > any number of vCPUs (even > 64 with sparse CPU set), the whole procedure > > will take only one VMEXIT. > > > > Current Hyper-V TLFS (v5.0b) claims that HvCallSendSyntheticClusterIpi > > hypercall can't be 'fast' (passing parameters through registers) but > > apparently this is not true, Windows always uses it as 'fast' so we need > > to support that. > > > > Signed-off-by: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx> > > --- > > diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c > > @@ -1357,6 +1357,108 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa, > > ((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET); > > } > > > > +static u64 kvm_hv_send_ipi(struct kvm_vcpu *current_vcpu, u64 ingpa, u64 outgpa, > > + bool ex, bool fast) > > +{ > > + struct kvm *kvm = current_vcpu->kvm; > > + struct hv_send_ipi_ex send_ipi_ex; > > + struct hv_send_ipi send_ipi; > > + struct kvm_vcpu *vcpu; > > + unsigned long valid_bank_mask = 0; > > + u64 sparse_banks[64]; > > + int sparse_banks_len, i; > > + struct kvm_lapic_irq irq = {0}; > > + bool all_cpus; > > + > > + if (!ex) { > > + if (!fast) { > > + if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi, > > + sizeof(send_ipi)))) > > + return HV_STATUS_INVALID_HYPERCALL_INPUT; > > + sparse_banks[0] = send_ipi.cpu_mask; > > + irq.vector = send_ipi.vector; > > + } else { > > + /* 'reserved' part of hv_send_ipi should be 0 */ > > + if (unlikely(ingpa >> 32 != 0)) > > + return HV_STATUS_INVALID_HYPERCALL_INPUT; > > + sparse_banks[0] = outgpa; > > + irq.vector = (u32)ingpa; > > + } > > + all_cpus = false; > > + > > + trace_kvm_hv_send_ipi(irq.vector, sparse_banks[0]); > > + } else { > > + if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi_ex, > > + sizeof(send_ipi_ex)))) > > + return HV_STATUS_INVALID_HYPERCALL_INPUT; > > + > > + trace_kvm_hv_send_ipi_ex(send_ipi_ex.vector, > > + send_ipi_ex.vp_set.format, > > + send_ipi_ex.vp_set.valid_bank_mask); > > + > > + irq.vector = send_ipi_ex.vector; > > + valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask; > > + sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) * > > + sizeof(sparse_banks[0]); > > + all_cpus = send_ipi_ex.vp_set.format != > > + HV_GENERIC_SET_SPARSE_4K; > > This would be much better readable as > > send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL > > And if Microsoft ever adds more formats, they won't be all VCPUs, so > we're future-proofing as well. > > > + > > + if (!sparse_banks_len) > > + goto ret_success; > > + > > + if (!all_cpus && > > + kvm_read_guest(kvm, > > + ingpa + offsetof(struct hv_send_ipi_ex, > > + vp_set.bank_contents), > > + sparse_banks, > > + sparse_banks_len)) > > + return HV_STATUS_INVALID_HYPERCALL_INPUT; > > + } > > + > > + if ((irq.vector < HV_IPI_LOW_VECTOR) || > > + (irq.vector > HV_IPI_HIGH_VECTOR)) > > + return HV_STATUS_INVALID_HYPERCALL_INPUT; > > + > > + irq.delivery_mode = APIC_DM_FIXED; > > I'd set this during variable definition. > > APIC_DM_FIXED is 0 anyway and the compiler probably won't optimize it > here due to function with side effects since definition. > > > + > > + kvm_for_each_vcpu(i, vcpu, kvm) { > > + struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv; > > + int bank = hv->vp_index / 64, sbank = 0; > > + > > + if (!all_cpus) { > > + /* Banks >64 can't be represented */ > > + if (bank >= 64) > > + continue; > > + > > + /* Non-ex hypercalls can only address first 64 vCPUs */ > > + if (!ex && bank) > > + continue; > > + > > + if (ex) { > > + /* > > + * Check is the bank of this vCPU is in sparse > > + * set and get the sparse bank number. > > + */ > > + sbank = get_sparse_bank_no(valid_bank_mask, > > + bank); > > + > > + if (sbank < 0) > > + continue; > > + } > > + > > + if (!(sparse_banks[sbank] & BIT_ULL(hv->vp_index % 64))) > > + continue; > > + } > > + > > + /* We fail only when APIC is disabled */ > > + if (!kvm_apic_set_irq(vcpu, &irq, NULL)) > > + return HV_STATUS_INVALID_HYPERCALL_INPUT; > > Does Windows use this even for 1 VCPU IPI? > > I'm thinking we could apply the same optimization we do for LAPIC -- RCU > protected array that maps vp_index to vcpu. > > Thanks. > > > + } > > + > > +ret_success: > > + return HV_STATUS_SUCCESS; > > +} > > + > > bool kvm_hv_hypercall_enabled(struct kvm *kvm) > > { > > return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & HV_X64_MSR_HYPERCALL_ENABLE; > > @@ -1526,6 +1628,20 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) > > } > > ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true); > > break; > > + case HVCALL_SEND_IPI: > > + if (unlikely(rep)) { > > + ret = HV_STATUS_INVALID_HYPERCALL_INPUT; > > + break; > > + } > > + ret = kvm_hv_send_ipi(vcpu, ingpa, outgpa, false, fast); > > + break; > > + case HVCALL_SEND_IPI_EX: Hi Paolo and Radim, I have already completed the patches for linux guest/kvm/qemu w/ vCPUs <= 64, however, extra complication as the ex in hyperv should be introduced for vCPUs > 64, so do you think vCPU <=64 is enough for linux guest or should me introduce two hypercall as what hyperv does w/ ex logic? Regards, Wanpeng Li