Hi Ganapat, On Wed, Aug 21, 2024 at 09:55:37AM +0530, Ganapatrao Kulkarni wrote: > Have you tested/tried NV with host/L0 booted with GICv4.x enabled? > We do see L2 boot hang and I don't have much debug info at the moment. Sorry, I've been sitting on a fix for this that I've been meaning to send out. The issue has to do with the fact that the vpe is marked as runnable (its_vpe::pending_last = true) when descheduled w/o requesting a doorbell IRQ. Once KVM completes the nested ERET, it believes an IRQ is pending for L1 (kvm_vgic_vcpu_pending_irq() returns true), and injects the nested exception. This can be papered over by requesting the doorbell IRQ, which we need anyway to kick us out of the L2 when an IRQ becomes pending for L1. Could you take this diff for a spin? diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 0ae093bae054..9d07184d79b1 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -613,6 +613,12 @@ struct cpu_sve_state { * field. */ struct kvm_host_data { + /* SVE enabled for EL0 */ +#define HOST_SVE_ENABLED 0 + /* SME enabled for EL0 */ +#define HOST_SME_ENABLED 1 + unsigned long flags; + struct kvm_cpu_context host_ctxt; /* @@ -908,10 +914,8 @@ struct kvm_vcpu_arch { /* Save TRBE context if active */ #define DEBUG_STATE_SAVE_TRBE __vcpu_single_flag(iflags, BIT(6)) -/* SVE enabled for host EL0 */ -#define HOST_SVE_ENABLED __vcpu_single_flag(sflags, BIT(0)) -/* SME enabled for EL0 */ -#define HOST_SME_ENABLED __vcpu_single_flag(sflags, BIT(1)) +/* KVM is currently emulating a nested ERET */ +#define IN_NESTED_ERET __vcpu_single_flag(sflags, BIT(0)) /* Physical CPU not in supported_cpus */ #define ON_UNSUPPORTED_CPU __vcpu_single_flag(sflags, BIT(2)) /* WFIT instruction trapped */ @@ -1294,6 +1298,10 @@ DECLARE_KVM_HYP_PER_CPU(struct kvm_host_data, kvm_host_data); &this_cpu_ptr_hyp_sym(kvm_host_data)->f) #endif +#define host_data_set_flag(nr) set_bit(nr, host_data_ptr(flags)) +#define host_data_test_flag(nr) test_bit(nr, host_data_ptr(flags)) +#define host_data_clear_flag(nr) clear_bit(nr, host_data_ptr(flags)) + /* Check whether the FP regs are owned by the guest */ static inline bool guest_owns_fp_regs(void) { diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index 05166eccea0a..fd3d6275b777 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -2310,6 +2310,7 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu) } preempt_disable(); + vcpu_set_flag(vcpu, IN_NESTED_ERET); kvm_arch_vcpu_put(vcpu); if (!esr_iss_is_eretax(esr)) @@ -2321,6 +2322,7 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu) *vcpu_cpsr(vcpu) = spsr; kvm_arch_vcpu_load(vcpu, smp_processor_id()); + vcpu_clear_flag(vcpu, IN_NESTED_ERET); preempt_enable(); } diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index c53e5b14038d..f7712c89adef 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -64,14 +64,14 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED; *host_data_ptr(fpsimd_state) = kern_hyp_va(¤t->thread.uw.fpsimd_state); - vcpu_clear_flag(vcpu, HOST_SVE_ENABLED); + host_data_clear_flag(HOST_SVE_ENABLED); if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) - vcpu_set_flag(vcpu, HOST_SVE_ENABLED); + host_data_set_flag(HOST_SVE_ENABLED); if (system_supports_sme()) { - vcpu_clear_flag(vcpu, HOST_SME_ENABLED); + host_data_clear_flag(HOST_SME_ENABLED); if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN) - vcpu_set_flag(vcpu, HOST_SME_ENABLED); + host_data_set_flag(HOST_SME_ENABLED); /* * If PSTATE.SM is enabled then save any pending FP @@ -167,7 +167,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) */ if (has_vhe() && system_supports_sme()) { /* Also restore EL0 state seen on entry */ - if (vcpu_get_flag(vcpu, HOST_SME_ENABLED)) + if (host_data_test_flag(HOST_SME_ENABLED)) sysreg_clear_set(CPACR_EL1, 0, CPACR_ELx_SMEN); else sysreg_clear_set(CPACR_EL1, @@ -226,7 +226,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) * for EL0. To avoid spurious traps, restore the trap state * seen by kvm_arch_vcpu_load_fp(): */ - if (vcpu_get_flag(vcpu, HOST_SVE_ENABLED)) + if (host_data_test_flag(HOST_SVE_ENABLED)) sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_ZEN_EL0EN); else sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0); diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c index 74a67ad87f29..9f3f06ac76cc 100644 --- a/arch/arm64/kvm/vgic/vgic-v4.c +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -336,6 +336,22 @@ void vgic_v4_teardown(struct kvm *kvm) its_vm->vpes = NULL; } +static inline bool vgic_v4_want_doorbell(struct kvm_vcpu *vcpu) +{ + if (vcpu_get_flag(vcpu, IN_WFI)) + return true; + + if (likely(!vcpu_has_nv(vcpu))) + return false; + + /* + * GICv4 hardware is only ever used for the L1. Mark the vPE (i.e. the + * L1 context) nonresident and request a doorbell to kick us out of the + * L2 when an IRQ becomes pending. + */ + return vcpu_get_flag(vcpu, IN_NESTED_ERET); +} + int vgic_v4_put(struct kvm_vcpu *vcpu) { struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; @@ -343,7 +359,7 @@ int vgic_v4_put(struct kvm_vcpu *vcpu) if (!vgic_supports_direct_msis(vcpu->kvm) || !vpe->resident) return 0; - return its_make_vpe_non_resident(vpe, !!vcpu_get_flag(vcpu, IN_WFI)); + return its_make_vpe_non_resident(vpe, vgic_v4_want_doorbell(vcpu)); } int vgic_v4_load(struct kvm_vcpu *vcpu) -- Thanks, Oliver