On Mon, Aug 27, 2018 at 06:48:58PM +0200, Vitaly Kuznetsov wrote: > Using hypercall for sending IPIs is faster because this allows to specify > any number of vCPUs (even > 64 with sparse CPU set), the whole procedure > will take only one VMEXIT. > > Current Hyper-V TLFS (v5.0b) claims that HvCallSendSyntheticClusterIpi > hypercall can't be 'fast' (passing parameters through registers) but > apparently this is not true, Windows always uses it as 'fast' so we need > to support that. > > Signed-off-by: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx> > --- > Documentation/virtual/kvm/api.txt | 7 +++ > arch/x86/kvm/hyperv.c | 105 ++++++++++++++++++++++++++++++++++++++ > arch/x86/kvm/trace.h | 42 +++++++++++++++ > arch/x86/kvm/x86.c | 1 + > include/uapi/linux/kvm.h | 1 + > 5 files changed, 156 insertions(+) > > diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt > index c664064f76fb..d6fb93f22c0b 100644 > --- a/Documentation/virtual/kvm/api.txt > +++ b/Documentation/virtual/kvm/api.txt > @@ -4762,3 +4762,10 @@ CPU when the exception is taken. If this virtual SError is taken to EL1 using > AArch64, this value will be reported in the ISS field of ESR_ELx. > > See KVM_CAP_VCPU_EVENTS for more details. > +8.20 KVM_CAP_HYPERV_SEND_IPI > + > +Architectures: x86 > + > +This capability indicates that KVM supports paravirtualized Hyper-V IPI send > +hypercalls: > +HvCallSendSyntheticClusterIpi, HvCallSendSyntheticClusterIpiEx. > diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c > index d1a911132b59..dadec987a39b 100644 > --- a/arch/x86/kvm/hyperv.c > +++ b/arch/x86/kvm/hyperv.c > @@ -1360,6 +1360,97 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa, > ((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET); > } > > +static u64 kvm_hv_send_ipi(struct kvm_vcpu *current_vcpu, u64 ingpa, u64 outgpa, > + bool ex, bool fast) > +{ > + struct kvm *kvm = current_vcpu->kvm; > + struct hv_send_ipi_ex send_ipi_ex; > + struct hv_send_ipi send_ipi; > + struct kvm_vcpu *vcpu; > + unsigned long valid_bank_mask; > + u64 sparse_banks[64]; > + int sparse_banks_len, bank, i; > + struct kvm_lapic_irq irq = {.delivery_mode = APIC_DM_FIXED}; > + bool all_cpus; > + > + if (!ex) { > + if (!fast) { > + if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi, > + sizeof(send_ipi)))) > + return HV_STATUS_INVALID_HYPERCALL_INPUT; > + sparse_banks[0] = send_ipi.cpu_mask; > + irq.vector = send_ipi.vector; > + } else { > + /* 'reserved' part of hv_send_ipi should be 0 */ > + if (unlikely(ingpa >> 32 != 0)) > + return HV_STATUS_INVALID_HYPERCALL_INPUT; > + sparse_banks[0] = outgpa; > + irq.vector = (u32)ingpa; > + } > + all_cpus = false; > + valid_bank_mask = BIT_ULL(0); > + > + trace_kvm_hv_send_ipi(irq.vector, sparse_banks[0]); > + } else { > + if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi_ex, > + sizeof(send_ipi_ex)))) > + return HV_STATUS_INVALID_HYPERCALL_INPUT; > + > + trace_kvm_hv_send_ipi_ex(send_ipi_ex.vector, > + send_ipi_ex.vp_set.format, > + send_ipi_ex.vp_set.valid_bank_mask); > + > + irq.vector = send_ipi_ex.vector; > + valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask; > + sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) * > + sizeof(sparse_banks[0]); > + > + all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL; > + > + if (!sparse_banks_len) > + goto ret_success; > + > + if (!all_cpus && > + kvm_read_guest(kvm, > + ingpa + offsetof(struct hv_send_ipi_ex, > + vp_set.bank_contents), > + sparse_banks, > + sparse_banks_len)) > + return HV_STATUS_INVALID_HYPERCALL_INPUT; > + } > + > + if ((irq.vector < HV_IPI_LOW_VECTOR) || > + (irq.vector > HV_IPI_HIGH_VECTOR)) > + return HV_STATUS_INVALID_HYPERCALL_INPUT; > + > + if (all_cpus) { > + kvm_for_each_vcpu(i, vcpu, kvm) { > + /* We fail only when APIC is disabled */ > + if (!kvm_apic_set_irq(vcpu, &irq, NULL)) > + return HV_STATUS_INVALID_HYPERCALL_INPUT; > + } > + goto ret_success; > + } > + > + for_each_set_bit(bank, (unsigned long *)&valid_bank_mask, 64) { > + for_each_set_bit(i, (unsigned long *)&sparse_banks[bank], 64) { > + u32 vp_index = bank * 64 + i; > + struct kvm_vcpu *vcpu = > + get_vcpu_by_vpidx(kvm, vp_index); > + > + /* Unknown vCPU specified */ > + if (!vcpu) > + continue; > + > + /* We fail only when APIC is disabled */ > + kvm_apic_set_irq(vcpu, &irq, NULL); > + } > + } > + > +ret_success: > + return HV_STATUS_SUCCESS; > +} > + I still think that splitting kvm_hv_send_ipi into three functions would make it more readable, but that's a matter of taste of course, so I'm OK if Radim insists otherwise. Reviewed-by: Roman Kagan <rkagan@xxxxxxxxxxxxx>