Secure AVIC hardware only accelerates self IPIs. For cross-vCPU IPI, source vCPU updates the APIC_IRR of destination vCPU and then issues VMGEXIT with type "SVM_EXIT_MSR" to propagate APIC_ICR write to the hypervisor. Hypervisor then examines the ICR data and sends doorbell to running vCPUs using AVIC Doorbell MSR or wakes up a blocking vCPU. Hypervisor then resumes the vCPU which issued the VMGEXIT. Co-developed-by: Kishon Vijay Abraham I <kvijayab@xxxxxxx> Signed-off-by: Kishon Vijay Abraham I <kvijayab@xxxxxxx> Signed-off-by: Neeraj Upadhyay <Neeraj.Upadhyay@xxxxxxx> --- arch/x86/kvm/svm/sev.c | 216 ++++++++++++++++++++++++++++++++++++++++- arch/x86/kvm/svm/svm.h | 2 + 2 files changed, 217 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 080b71ade88d..d8413c7f4832 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -3496,6 +3496,89 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu) struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu); unsigned int asid = sev_get_asid(svm->vcpu.kvm); + /* + * It should be safe to clear sev_savic_has_pending_ipi here. + * + * Following are the scenarios possible: + * + * Scenario 1: sev_savic_has_pending_ipi is set before hlt exit of the + * target vCPU. + * + * Source vCPU Target vCPU + * + * 1. Set APIC_IRR of target + * vCPU. + * + * 2. VMGEXIT + * + * 3. Set ...has_pending_ipi + * + * savic_handle_icr_write() + * ..._has_pending_ipi = true + * + * 4. avic_ring_doorbell() + * - VS - + * + * 4. VMEXIT + * + * 5. ..._has_pending_ipi = false + * + * 6. VM entry + * + * 7. hlt exit + * + * In this case, any VM exit taken by target vCPU before hlt exit + * clears sev_savic_has_pending_ipi. On hlt exit, idle halt intercept + * would find the V_INTR set and skip hlt exit. + * + * Scenario 2: sev_savic_has_pending_ipi is set when target vCPU + * has taken hlt exit. + * + * Source vCPU Target vCPU + * + * 1. hlt exit + * + * 2. Set ...has_pending_ipi + * 3. kvm_vcpu_has_events() returns true + * and VM is reentered. + * + * vcpu_block() + * kvm_arch_vcpu_runnable() + * kvm_vcpu_has_events() + * <return true as ..._has_pending_ipi + * is set> + * + * 4. On VM entry, APIC_IRR state is re-evaluated + * and V_INTR is set and interrupt is delivered + * to vCPU. + * + * + * Scenario 3: sev_savic_has_pending_ipi is set while halt exit is happening: + * + * + * Source vCPU Target vCPU + * + * 1. hlt + * Hardware check V_INTR to determine + * if hlt exit need to be taken. No other + * exit such as intr exit can be taken + * while this sequence is being executed. + * + * 2. Set APIC_IRR of target vCPU. + * + * 3. Set ...has_pending_ipi + * 4. hlt exit taken. + * + * 5. ...has_pending_ipi being set is observed + * by target vCPU and the vCPU is resumed. + * + * In this scenario, hardware ensures that target vCPU does not take any exit + * between checking V_INTR state and halt exit. So, sev_savic_has_pending_ipi + * remains set when vCPU takes hlt exit. + */ + if (READ_ONCE(svm->sev_savic_has_pending_ipi)) + WRITE_ONCE(svm->sev_savic_has_pending_ipi, false); + /* Assign the asid allocated with this SEV guest */ svm->asid = asid; @@ -4303,6 +4386,129 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) return 0; } +static void savic_handle_icr_write(struct kvm_vcpu *kvm_vcpu, u64 icr) +{ + struct kvm *kvm = kvm_vcpu->kvm; + struct kvm_vcpu *vcpu; + u32 icr_low, icr_high; + bool in_guest_mode; + unsigned long i; + + icr_low = lower_32_bits(icr); + icr_high = upper_32_bits(icr); + + /* + * TODO: Instead of scanning all the vCPUS, get fastpath working which should + * look similar to avic_kick_target_vcpus_fast(). + */ + kvm_for_each_vcpu(i, vcpu, kvm) { + if (!kvm_apic_match_dest(vcpu, kvm_vcpu->arch.apic, icr_low & APIC_SHORT_MASK, + icr_high, icr_low & APIC_DEST_MASK)) + continue; + + /* + * Setting sev_savic_has_pending_ipi could result in a spurious + * wakeup from hlt (as kvm_cpu_has_interrupt() would return true) + * if destination CPU is inside guest and guest does a halt exit + * after handling the IPI. sev_savic_has_pending_ipi gets cleared + * on vm entry, so there can be at most one spurious wakeup per IPI. + * For vcpu->mode == IN_GUEST_MODE, sev_savic_has_pending_ipi need + * to be set to handle the case where destination vCPU has taken + * halt exit and the source CPU has not observed vcpu->mode != + * IN_GUEST_MODE. + */ + WRITE_ONCE(to_svm(vcpu)->sev_savic_has_pending_ipi, true); + /* Order sev_savic_has_pending_ipi write and vcpu->mode read. */ + smp_mb(); + /* Pairs with smp_store_release in vcpu_enter_guest. */ + in_guest_mode = (smp_load_acquire(&vcpu->mode) == IN_GUEST_MODE); + if (in_guest_mode) { + /* + * Signal the doorbell to tell hardware to inject the IRQ. + * + * If the vCPU exits the guest before the doorbell chimes, + * below memory ordering guarantees that destination vCPU + * observes sev_savic_has_pending_ipi == true before + * blocking. + * + * Src-CPU Dest-CPU + * + * savic_handle_icr_write() + * sev_savic_has_pending_ipi = true + * smp_mb() + * smp_load_acquire(&vcpu->mode) + * + * - VS - + * vcpu->mode = OUTSIDE_GUEST_MODE + * __kvm_emulate_halt() + * kvm_cpu_has_interrupt() + * smp_mb() + * if (sev_savic_has_pending_ipi) + * return true; + * + * [S1] + * sev_savic_has_pending_ipi = true + * + * SMP_MB + * + * [L1] + * vcpu->mode + * [S2] + * vcpu->mode = OUTSIDE_GUEST_MODE + * + * + * SMP_MB + * + * [L2] sev_savic_has_pending_ipi == true + * + * exists (L1=IN_GUEST_MODE /\ L2=false) + * + * Above condition does not exit. So, if source CPU observes vcpu->mode + * = IN_GUEST_MODE (L1), sev_savic_has_pending_ipi load by destination CPU + * (L2) should observe the store (S1) from source CPU. + */ + avic_ring_doorbell(vcpu); + } else { + /* + * Wake the vCPU if it was blocking. + * + * Memory ordering is provided by smp_mb() in rcuwait_wake_up() on + * source CPU and smp_mb() in set_current_state() inside + * kvm_vcpu_block() on dest CPU. + */ + kvm_vcpu_kick(vcpu); + } + } +} + +static bool savic_handle_msr_exit(struct kvm_vcpu *vcpu) +{ + u32 msr, reg; + + msr = kvm_rcx_read(vcpu); + reg = (msr - APIC_BASE_MSR) << 4; + + switch (reg) { + case APIC_ICR: + /* + * Only APIC_ICR wrmsr requires special handling + * for Secure AVIC guests to wake up destination + * vCPUs. + */ + if (to_svm(vcpu)->vmcb->control.exit_info_1) { + u64 data = kvm_read_edx_eax(vcpu); + + savic_handle_icr_write(vcpu, data); + return true; + } + break; + default: + break; + } + + return false; +} + int sev_handle_vmgexit(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -4445,6 +4651,11 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) control->exit_info_1, control->exit_info_2); ret = -EINVAL; break; + case SVM_EXIT_MSR: + if (sev_savic_active(vcpu->kvm) && savic_handle_msr_exit(vcpu)) + return 1; + + fallthrough; default: ret = svm_invoke_exit_handler(vcpu, exit_code); } @@ -5023,5 +5234,8 @@ void sev_savic_set_requested_irr(struct vcpu_svm *svm, bool reinjected) bool sev_savic_has_pending_interrupt(struct kvm_vcpu *vcpu) { - return kvm_apic_has_interrupt(vcpu) != -1; + /* See memory ordering description in savic_handle_icr_write(). */ + smp_mb(); + return READ_ONCE(to_svm(vcpu)->sev_savic_has_pending_ipi) || + kvm_apic_has_interrupt(vcpu) != -1; } diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index f70c161ad352..62e3581b7d31 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -327,6 +327,8 @@ struct vcpu_svm { /* Guest GIF value, used when vGIF is not enabled */ bool guest_gif; + + bool sev_savic_has_pending_ipi; }; struct svm_cpu_data { -- 2.34.1