When the VM boots, the CAS negotiation process determines which interrupt mode to use and invokes a machine reset. At that time, the previous KVM interrupt device is 'destroyed' before the chosen one is created. Upon destruction, the vCPU interrupt presenters using the KVM device should be cleared first, the machine will reconnect them later to the new device after it is created. When using the KVM device, there is still a race window with the early checks in kvmppc_native_connect_vcpu(). Yet to be fixed. Signed-off-by: Cédric Le Goater <clg@xxxxxxxx> --- arch/powerpc/kvm/book3s_xics.c | 19 +++++++++++++ arch/powerpc/kvm/book3s_xive.c | 39 +++++++++++++++++++++++++-- arch/powerpc/kvm/book3s_xive_native.c | 16 +++++++++++ 3 files changed, 72 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index f27ee57ab46e..81cdabf4295f 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -1342,6 +1342,25 @@ static void kvmppc_xics_free(struct kvm_device *dev) struct kvmppc_xics *xics = dev->private; int i; struct kvm *kvm = xics->kvm; + struct kvm_vcpu *vcpu; + + /* + * When destroying the VM, the vCPUs are destroyed first and + * the vCPU list should be empty. If this is not the case, + * then we are simply destroying the device and we should + * clean up the vCPU interrupt presenters first. + */ + if (atomic_read(&kvm->online_vcpus) != 0) { + /* + * call kick_all_cpus_sync() to ensure that all CPUs + * have executed any pending interrupts + */ + if (is_kvmppc_hv_enabled(kvm)) + kick_all_cpus_sync(); + + kvm_for_each_vcpu(i, vcpu, kvm) + kvmppc_xics_free_icp(vcpu); + } debugfs_remove(xics->dentry); diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 7a14512b8944..0a1c11d6881c 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -1105,11 +1105,19 @@ void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu) void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) { struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; - struct kvmppc_xive *xive = xc->xive; + struct kvmppc_xive *xive; int i; + if (!kvmppc_xics_enabled(vcpu)) + return; + + if (!xc) + return; + pr_devel("cleanup_vcpu(cpu=%d)\n", xc->server_num); + xive = xc->xive; + /* Ensure no interrupt is still routed to that VP */ xc->valid = false; kvmppc_xive_disable_vcpu_interrupts(vcpu); @@ -1146,6 +1154,10 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) } /* Free the VP */ kfree(xc); + + /* Cleanup the vcpu */ + vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; + vcpu->arch.xive_vcpu = NULL; } int kvmppc_xive_connect_vcpu(struct kvm_device *dev, @@ -1163,7 +1175,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, } if (xive->kvm != vcpu->kvm) return -EPERM; - if (vcpu->arch.irq_type) + if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT) return -EBUSY; if (kvmppc_xive_find_server(vcpu->kvm, cpu)) { pr_devel("Duplicate !\n"); @@ -1833,8 +1845,31 @@ static void kvmppc_xive_free(struct kvm_device *dev) { struct kvmppc_xive *xive = dev->private; struct kvm *kvm = xive->kvm; + struct kvm_vcpu *vcpu; int i; + /* + * When destroying the VM, the vCPUs are destroyed first and + * the vCPU list should be empty. If this is not the case, + * then we are simply destroying the device and we should + * clean up the vCPU interrupt presenters first. + */ + if (atomic_read(&kvm->online_vcpus) != 0) { + /* + * call kick_all_cpus_sync() to ensure that all CPUs + * have executed any pending interrupts + */ + if (is_kvmppc_hv_enabled(kvm)) + kick_all_cpus_sync(); + + /* + * TODO: There is still a race window with the early + * checks in kvmppc_native_connect_vcpu() + */ + kvm_for_each_vcpu(i, vcpu, kvm) + kvmppc_xive_cleanup_vcpu(vcpu); + } + debugfs_remove(xive->dentry); if (kvm) diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index bf60870144f1..c0655164d9af 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -909,8 +909,24 @@ static void kvmppc_xive_native_free(struct kvm_device *dev) { struct kvmppc_xive *xive = dev->private; struct kvm *kvm = xive->kvm; + struct kvm_vcpu *vcpu; int i; + /* + * When destroying the VM, the vCPUs are destroyed first and + * the vCPU list should be empty. If this is not the case, + * then we are simply destroying the device and we should + * clean up the vCPU interrupt presenters first. + */ + if (atomic_read(&kvm->online_vcpus) != 0) { + /* + * TODO: There is still a race window with the early + * checks in kvmppc_xive_native_connect_vcpu() + */ + kvm_for_each_vcpu(i, vcpu, kvm) + kvmppc_xive_native_cleanup_vcpu(vcpu); + } + debugfs_remove(xive->dentry); pr_devel("Destroying xive native device\n"); -- 2.20.1