Hi Oliver, On Sat, Oct 26, 2024 at 7:53 AM Oliver Upton <oliver.upton@xxxxxxxxx> wrote: > > On Sat, Oct 26, 2024 at 08:43:21AM +0100, Marc Zyngier wrote: > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > > index bf64fed9820e..c315bc1a4e9a 100644 > > --- a/arch/arm64/include/asm/kvm_host.h > > +++ b/arch/arm64/include/asm/kvm_host.h > > @@ -74,8 +74,6 @@ enum kvm_mode kvm_get_mode(void); > > static inline enum kvm_mode kvm_get_mode(void) { return KVM_MODE_NONE; }; > > #endif > > > > -DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); > > - > > extern unsigned int __ro_after_init kvm_sve_max_vl; > > extern unsigned int __ro_after_init kvm_host_sve_max_vl; > > int __init kvm_arm_init_sve(void); > > diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c > > index 879982b1cc73..1215df590418 100644 > > --- a/arch/arm64/kvm/arch_timer.c > > +++ b/arch/arm64/kvm/arch_timer.c > > @@ -206,8 +206,7 @@ void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map) > > > > static inline bool userspace_irqchip(struct kvm *kvm) > > { > > - return static_branch_unlikely(&userspace_irqchip_in_use) && > > - unlikely(!irqchip_in_kernel(kvm)); > > + return unlikely(!irqchip_in_kernel(kvm)); > > } > > > > static void soft_timer_start(struct hrtimer *hrt, u64 ns) > > diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c > > index 48cafb65d6ac..70ff9a20ef3a 100644 > > --- a/arch/arm64/kvm/arm.c > > +++ b/arch/arm64/kvm/arm.c > > @@ -69,7 +69,6 @@ DECLARE_KVM_NVHE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); > > static bool vgic_present, kvm_arm_initialised; > > > > static DEFINE_PER_CPU(unsigned char, kvm_hyp_initialized); > > -DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use); > > > > bool is_kvm_arm_initialised(void) > > { > > @@ -503,9 +502,6 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) > > > > void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) > > { > > - if (vcpu_has_run_once(vcpu) && unlikely(!irqchip_in_kernel(vcpu->kvm))) > > - static_branch_dec(&userspace_irqchip_in_use); > > - > > kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); > > kvm_timer_vcpu_terminate(vcpu); > > kvm_pmu_vcpu_destroy(vcpu); > > @@ -848,14 +844,6 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) > > return ret; > > } > > > > - if (!irqchip_in_kernel(kvm)) { > > - /* > > - * Tell the rest of the code that there are userspace irqchip > > - * VMs in the wild. > > - */ > > - static_branch_inc(&userspace_irqchip_in_use); > > - } > > - > > /* > > * Initialize traps for protected VMs. > > * NOTE: Move to run in EL2 directly, rather than via a hypercall, once > > @@ -1077,7 +1065,7 @@ static bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu, int *ret) > > * state gets updated in kvm_timer_update_run and > > * kvm_pmu_update_run below). > > */ > > - if (static_branch_unlikely(&userspace_irqchip_in_use)) { > > + if (unlikely(!irqchip_in_kernel(vcpu->kvm))) { > > if (kvm_timer_should_notify_user(vcpu) || > > kvm_pmu_should_notify_user(vcpu)) { > > *ret = -EINTR; > > @@ -1199,7 +1187,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) > > vcpu->mode = OUTSIDE_GUEST_MODE; > > isb(); /* Ensure work in x_flush_hwstate is committed */ > > kvm_pmu_sync_hwstate(vcpu); > > - if (static_branch_unlikely(&userspace_irqchip_in_use)) > > + if (unlikely(!irqchip_in_kernel(vcpu->kvm))) > > kvm_timer_sync_user(vcpu); > > kvm_vgic_sync_hwstate(vcpu); > > local_irq_enable(); > > @@ -1245,7 +1233,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) > > * we don't want vtimer interrupts to race with syncing the > > * timer virtual interrupt state. > > */ > > - if (static_branch_unlikely(&userspace_irqchip_in_use)) > > + if (unlikely(!irqchip_in_kernel(vcpu->kvm))) > > kvm_timer_sync_user(vcpu); > > > > kvm_arch_vcpu_ctxsync_fp(vcpu); > > > > I think this would fix the problem you're seeing without changing the > > userspace view of an erroneous configuration. It would also pave the > > way for the complete removal of the interrupt notification to > > userspace, which I claim has no user and is just a shit idea. > > Yeah, looks like this ought to get it done. > > Even with a fix for this particular issue I do wonder if we should > categorically harden against late initialization failures and un-init > the vCPU (or bug VM, where necessary) to avoid dealing with half-baked > vCPUs/VMs across our UAPI surfaces. > > A sane userspace will probably crash when KVM_RUN returns EINVAL anyway. Thanks for the suggestion. Sure, I'll take another look at the possible things that we can uninitialize and try to re-spin the patch. Marc, If you feel userspace_irqchip_in_use is not necessary anymore, and as a quick fix to this issue, we can get rid of that independent of the un-init effort. Thank you. Raghavendra