From: Wanpeng Li <wanpengli@xxxxxxxxxxx> Using hypercall to send IPIs by one vmexit instead of one by one for xAPIC/x2APIC physical mode and one vmexit per-cluster for x2APIC cluster mode. Even if enable qemu interrupt remapping and PV TLB Shootdown, I can still observe ~14% performance boost by ebizzy benchmark for 64 vCPUs VM, the total msr-induced vmexits reduce ~70%. Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx> Cc: Radim Krčmář <rkrcmar@xxxxxxxxxx> Cc: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx> Signed-off-by: Wanpeng Li <wanpengli@xxxxxxxxxxx> --- Documentation/virtual/kvm/cpuid.txt | 4 +++ Documentation/virtual/kvm/hypercalls.txt | 6 +++++ arch/x86/include/uapi/asm/kvm_para.h | 1 + arch/x86/kvm/cpuid.c | 3 ++- arch/x86/kvm/x86.c | 42 ++++++++++++++++++++++++++++++++ 5 files changed, 55 insertions(+), 1 deletion(-) diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt index ab022dc..97ca194 100644 --- a/Documentation/virtual/kvm/cpuid.txt +++ b/Documentation/virtual/kvm/cpuid.txt @@ -62,6 +62,10 @@ KVM_FEATURE_ASYNC_PF_VMEXIT || 10 || paravirtualized async PF VM exit || || can be enabled by setting bit 2 || || when writing to msr 0x4b564d02 ------------------------------------------------------------------------------ +KVM_FEATURE_PV_SEND_IPI || 11 || guest checks this feature bit + || || before using paravirtualized + || || send IPIs. +------------------------------------------------------------------------------ KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side || || per-cpu warps are expected in || || kvmclock. diff --git a/Documentation/virtual/kvm/hypercalls.txt b/Documentation/virtual/kvm/hypercalls.txt index a890529..a771ee8 100644 --- a/Documentation/virtual/kvm/hypercalls.txt +++ b/Documentation/virtual/kvm/hypercalls.txt @@ -121,3 +121,9 @@ compute the CLOCK_REALTIME for its clock, at the same instant. Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource, or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK. + +6. KVM_HC_SEND_IPI +------------------------ +Architecture: x86 +Status: active +Purpose: Hypercall used to send IPIs. diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 0ede697..19980ec 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -28,6 +28,7 @@ #define KVM_FEATURE_PV_UNHALT 7 #define KVM_FEATURE_PV_TLB_FLUSH 9 #define KVM_FEATURE_ASYNC_PF_VMEXIT 10 +#define KVM_FEATURE_PV_SEND_IPI 11 #define KVM_HINTS_REALTIME 0 diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 7e042e3..7bcfa61 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -621,7 +621,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) | (1 << KVM_FEATURE_PV_UNHALT) | (1 << KVM_FEATURE_PV_TLB_FLUSH) | - (1 << KVM_FEATURE_ASYNC_PF_VMEXIT); + (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) | + (1 << KVM_FEATURE_PV_SEND_IPI); if (sched_info_on()) entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0046aa7..c2cef21 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6689,6 +6689,45 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL); } +/* + * Return 0 if successfully added and 1 if discarded. + */ +static int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, + unsigned long ipi_bitmap_high, unsigned long icr) +{ + int i; + struct kvm_apic_map *map; + struct kvm_vcpu *vcpu; + struct kvm_lapic_irq irq = {0}; + + switch (icr & APIC_VECTOR_MASK) { + default: + irq.vector = icr & APIC_VECTOR_MASK; + break; + case NMI_VECTOR: + break; + } + irq.delivery_mode = icr & APIC_MODE_MASK; + + rcu_read_lock(); + map = rcu_dereference(kvm->arch.apic_map); + + for_each_set_bit(i, &ipi_bitmap_low, BITS_PER_LONG) { + vcpu = map->phys_map[i]->vcpu; + if (!kvm_apic_set_irq(vcpu, &irq, NULL)) + return 1; + } + + for_each_set_bit(i, &ipi_bitmap_high, BITS_PER_LONG) { + vcpu = map->phys_map[i + BITS_PER_LONG]->vcpu; + if (!kvm_apic_set_irq(vcpu, &irq, NULL)) + return 1; + } + + rcu_read_unlock(); + return 0; +} + void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu) { vcpu->arch.apicv_active = false; @@ -6737,6 +6776,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) case KVM_HC_CLOCK_PAIRING: ret = kvm_pv_clock_pairing(vcpu, a0, a1); break; + case KVM_HC_SEND_IPI: + ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2); + break; #endif default: ret = -KVM_ENOSYS; -- 2.7.4